brotli 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +4 -0
  6. data/Gemfile +4 -0
  7. data/README.md +36 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +7 -0
  11. data/brotli.gemspec +28 -0
  12. data/ext/brotli/brotli.cc +67 -0
  13. data/ext/brotli/brotli.h +9 -0
  14. data/ext/brotli/extconf.rb +34 -0
  15. data/lib/brotli.rb +2 -0
  16. data/lib/brotli/version.rb +3 -0
  17. data/vendor/brotli/LICENSE +202 -0
  18. data/vendor/brotli/dec/Makefile +12 -0
  19. data/vendor/brotli/dec/bit_reader.c +55 -0
  20. data/vendor/brotli/dec/bit_reader.h +256 -0
  21. data/vendor/brotli/dec/context.h +260 -0
  22. data/vendor/brotli/dec/decode.c +1573 -0
  23. data/vendor/brotli/dec/decode.h +160 -0
  24. data/vendor/brotli/dec/dictionary.h +9494 -0
  25. data/vendor/brotli/dec/huffman.c +325 -0
  26. data/vendor/brotli/dec/huffman.h +77 -0
  27. data/vendor/brotli/dec/port.h +148 -0
  28. data/vendor/brotli/dec/prefix.h +756 -0
  29. data/vendor/brotli/dec/state.c +149 -0
  30. data/vendor/brotli/dec/state.h +185 -0
  31. data/vendor/brotli/dec/streams.c +99 -0
  32. data/vendor/brotli/dec/streams.h +100 -0
  33. data/vendor/brotli/dec/transform.h +315 -0
  34. data/vendor/brotli/dec/types.h +36 -0
  35. data/vendor/brotli/enc/Makefile +11 -0
  36. data/vendor/brotli/enc/backward_references.cc +769 -0
  37. data/vendor/brotli/enc/backward_references.h +50 -0
  38. data/vendor/brotli/enc/bit_cost.h +147 -0
  39. data/vendor/brotli/enc/block_splitter.cc +418 -0
  40. data/vendor/brotli/enc/block_splitter.h +78 -0
  41. data/vendor/brotli/enc/brotli_bit_stream.cc +884 -0
  42. data/vendor/brotli/enc/brotli_bit_stream.h +149 -0
  43. data/vendor/brotli/enc/cluster.h +290 -0
  44. data/vendor/brotli/enc/command.h +140 -0
  45. data/vendor/brotli/enc/context.h +185 -0
  46. data/vendor/brotli/enc/dictionary.h +9485 -0
  47. data/vendor/brotli/enc/dictionary_hash.h +4125 -0
  48. data/vendor/brotli/enc/encode.cc +715 -0
  49. data/vendor/brotli/enc/encode.h +196 -0
  50. data/vendor/brotli/enc/encode_parallel.cc +354 -0
  51. data/vendor/brotli/enc/encode_parallel.h +37 -0
  52. data/vendor/brotli/enc/entropy_encode.cc +492 -0
  53. data/vendor/brotli/enc/entropy_encode.h +88 -0
  54. data/vendor/brotli/enc/fast_log.h +179 -0
  55. data/vendor/brotli/enc/find_match_length.h +87 -0
  56. data/vendor/brotli/enc/hash.h +686 -0
  57. data/vendor/brotli/enc/histogram.cc +76 -0
  58. data/vendor/brotli/enc/histogram.h +100 -0
  59. data/vendor/brotli/enc/literal_cost.cc +172 -0
  60. data/vendor/brotli/enc/literal_cost.h +38 -0
  61. data/vendor/brotli/enc/metablock.cc +544 -0
  62. data/vendor/brotli/enc/metablock.h +88 -0
  63. data/vendor/brotli/enc/port.h +151 -0
  64. data/vendor/brotli/enc/prefix.h +85 -0
  65. data/vendor/brotli/enc/ringbuffer.h +108 -0
  66. data/vendor/brotli/enc/static_dict.cc +441 -0
  67. data/vendor/brotli/enc/static_dict.h +40 -0
  68. data/vendor/brotli/enc/static_dict_lut.h +12063 -0
  69. data/vendor/brotli/enc/streams.cc +127 -0
  70. data/vendor/brotli/enc/streams.h +129 -0
  71. data/vendor/brotli/enc/transform.h +250 -0
  72. data/vendor/brotli/enc/write_bits.h +91 -0
  73. metadata +171 -0
@@ -0,0 +1,715 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Implementation of Brotli compressor.
16
+
17
+ #include "./encode.h"
18
+
19
+ #include <algorithm>
20
+ #include <limits>
21
+
22
+ #include "./backward_references.h"
23
+ #include "./bit_cost.h"
24
+ #include "./block_splitter.h"
25
+ #include "./brotli_bit_stream.h"
26
+ #include "./cluster.h"
27
+ #include "./context.h"
28
+ #include "./metablock.h"
29
+ #include "./transform.h"
30
+ #include "./entropy_encode.h"
31
+ #include "./fast_log.h"
32
+ #include "./hash.h"
33
+ #include "./histogram.h"
34
+ #include "./literal_cost.h"
35
+ #include "./prefix.h"
36
+ #include "./write_bits.h"
37
+
38
+ namespace brotli {
39
+
40
+ static const double kMinUTF8Ratio = 0.75;
41
+ static const int kMinQualityForBlockSplit = 4;
42
+ static const int kMinQualityForContextModeling = 5;
43
+ static const int kMinQualityForOptimizeHistograms = 4;
44
+
45
+ int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
46
+ // ASCII
47
+ if ((input[0] & 0x80) == 0) {
48
+ *symbol = input[0];
49
+ if (*symbol > 0) {
50
+ return 1;
51
+ }
52
+ }
53
+ // 2-byte UTF8
54
+ if (size > 1 &&
55
+ (input[0] & 0xe0) == 0xc0 &&
56
+ (input[1] & 0xc0) == 0x80) {
57
+ *symbol = (((input[0] & 0x1f) << 6) |
58
+ (input[1] & 0x3f));
59
+ if (*symbol > 0x7f) {
60
+ return 2;
61
+ }
62
+ }
63
+ // 3-byte UFT8
64
+ if (size > 2 &&
65
+ (input[0] & 0xf0) == 0xe0 &&
66
+ (input[1] & 0xc0) == 0x80 &&
67
+ (input[2] & 0xc0) == 0x80) {
68
+ *symbol = (((input[0] & 0x0f) << 12) |
69
+ ((input[1] & 0x3f) << 6) |
70
+ (input[2] & 0x3f));
71
+ if (*symbol > 0x7ff) {
72
+ return 3;
73
+ }
74
+ }
75
+ // 4-byte UFT8
76
+ if (size > 3 &&
77
+ (input[0] & 0xf8) == 0xf0 &&
78
+ (input[1] & 0xc0) == 0x80 &&
79
+ (input[2] & 0xc0) == 0x80 &&
80
+ (input[3] & 0xc0) == 0x80) {
81
+ *symbol = (((input[0] & 0x07) << 18) |
82
+ ((input[1] & 0x3f) << 12) |
83
+ ((input[2] & 0x3f) << 6) |
84
+ (input[3] & 0x3f));
85
+ if (*symbol > 0xffff && *symbol <= 0x10ffff) {
86
+ return 4;
87
+ }
88
+ }
89
+ // Not UTF8, emit a special symbol above the UTF8-code space
90
+ *symbol = 0x110000 | input[0];
91
+ return 1;
92
+ }
93
+
94
+ // Returns true if at least min_fraction of the data is UTF8-encoded.
95
+ bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
96
+ size_t size_utf8 = 0;
97
+ size_t pos = 0;
98
+ while (pos < length) {
99
+ int symbol;
100
+ int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
101
+ pos += bytes_read;
102
+ if (symbol < 0x110000) size_utf8 += bytes_read;
103
+ }
104
+ return size_utf8 > min_fraction * length;
105
+ }
106
+
107
+ void RecomputeDistancePrefixes(Command* cmds,
108
+ size_t num_commands,
109
+ int num_direct_distance_codes,
110
+ int distance_postfix_bits) {
111
+ if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
112
+ return;
113
+ }
114
+ for (int i = 0; i < num_commands; ++i) {
115
+ Command* cmd = &cmds[i];
116
+ if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
117
+ PrefixEncodeCopyDistance(cmd->DistanceCode(),
118
+ num_direct_distance_codes,
119
+ distance_postfix_bits,
120
+ &cmd->dist_prefix_,
121
+ &cmd->dist_extra_);
122
+ }
123
+ }
124
+ }
125
+
126
+ uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
127
+ if (storage_size_ < size) {
128
+ storage_.reset(new uint8_t[size]);
129
+ storage_size_ = size;
130
+ }
131
+ return &storage_[0];
132
+ }
133
+
134
+ BrotliCompressor::BrotliCompressor(BrotliParams params)
135
+ : params_(params),
136
+ hashers_(new Hashers()),
137
+ input_pos_(0),
138
+ num_commands_(0),
139
+ num_literals_(0),
140
+ last_insert_len_(0),
141
+ last_flush_pos_(0),
142
+ last_processed_pos_(0),
143
+ prev_byte_(0),
144
+ prev_byte2_(0),
145
+ storage_size_(0) {
146
+ // Sanitize params.
147
+ params_.quality = std::max(1, params_.quality);
148
+ if (params_.lgwin < kMinWindowBits) {
149
+ params_.lgwin = kMinWindowBits;
150
+ } else if (params_.lgwin > kMaxWindowBits) {
151
+ params_.lgwin = kMaxWindowBits;
152
+ }
153
+ if (params_.lgblock == 0) {
154
+ params_.lgblock = params_.quality < kMinQualityForBlockSplit ? 14 : 16;
155
+ if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
156
+ params_.lgblock = std::min(21, params_.lgwin);
157
+ }
158
+ } else {
159
+ params_.lgblock = std::min(kMaxInputBlockBits,
160
+ std::max(kMinInputBlockBits, params_.lgblock));
161
+ }
162
+
163
+ // Set maximum distance, see section 9.1. of the spec.
164
+ max_backward_distance_ = (1 << params_.lgwin) - 16;
165
+
166
+ // Initialize input and literal cost ring buffers.
167
+ // We allocate at least lgwin + 1 bits for the ring buffer so that the newly
168
+ // added block fits there completely and we still get lgwin bits and at least
169
+ // read_block_size_bits + 1 bits because the copy tail length needs to be
170
+ // smaller than ringbuffer size.
171
+ int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
172
+ ringbuffer_.reset(new RingBuffer(ringbuffer_bits, params_.lgblock));
173
+ if (params_.quality > 9) {
174
+ literal_cost_mask_ = (1 << params_.lgblock) - 1;
175
+ literal_cost_.reset(new float[literal_cost_mask_ + 1]);
176
+ }
177
+
178
+ // Allocate command buffer.
179
+ cmd_buffer_size_ = std::max(1 << 18, 1 << params_.lgblock);
180
+ commands_.reset(new brotli::Command[cmd_buffer_size_]);
181
+
182
+ // Initialize last byte with stream header.
183
+ if (params_.lgwin == 16) {
184
+ last_byte_ = 0;
185
+ last_byte_bits_ = 1;
186
+ } else if (params_.lgwin == 17) {
187
+ last_byte_ = 1;
188
+ last_byte_bits_ = 7;
189
+ } else if (params_.lgwin > 17) {
190
+ last_byte_ = ((params_.lgwin - 17) << 1) | 1;
191
+ last_byte_bits_ = 4;
192
+ } else {
193
+ last_byte_ = ((params_.lgwin - 8) << 4) | 1;
194
+ last_byte_bits_ = 7;
195
+ }
196
+
197
+ // Initialize distance cache.
198
+ dist_cache_[0] = 4;
199
+ dist_cache_[1] = 11;
200
+ dist_cache_[2] = 15;
201
+ dist_cache_[3] = 16;
202
+ // Save the state of the distance cache in case we need to restore it for
203
+ // emitting an uncompressed block.
204
+ memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
205
+
206
+ // Initialize hashers.
207
+ hash_type_ = std::min(9, params_.quality);
208
+ hashers_->Init(hash_type_);
209
+ }
210
+
211
+ BrotliCompressor::~BrotliCompressor() {
212
+ }
213
+
214
+ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
215
+ const uint8_t* input_buffer) {
216
+ ringbuffer_->Write(input_buffer, input_size);
217
+ input_pos_ += input_size;
218
+
219
+ // Erase a few more bytes in the ring buffer to make hashing not
220
+ // depend on uninitialized data. This makes compression deterministic
221
+ // and it prevents uninitialized memory warnings in Valgrind. Even
222
+ // without erasing, the output would be valid (but nondeterministic).
223
+ //
224
+ // Background information: The compressor stores short (at most 8 bytes)
225
+ // substrings of the input already read in a hash table, and detects
226
+ // repetitions by looking up such substrings in the hash table. If it
227
+ // can find a substring, it checks whether the substring is really there
228
+ // in the ring buffer (or it's just a hash collision). Should the hash
229
+ // table become corrupt, this check makes sure that the output is
230
+ // still valid, albeit the compression ratio would be bad.
231
+ //
232
+ // The compressor populates the hash table from the ring buffer as it's
233
+ // reading new bytes from the input. However, at the last few indexes of
234
+ // the ring buffer, there are not enough bytes to build full-length
235
+ // substrings from. Since the hash table always contains full-length
236
+ // substrings, we erase with dummy 0s here to make sure that those
237
+ // substrings will contain 0s at the end instead of uninitialized
238
+ // data.
239
+ //
240
+ // Please note that erasing is not necessary (because the
241
+ // memory region is already initialized since he ring buffer
242
+ // has a `tail' that holds a copy of the beginning,) so we
243
+ // skip erasing if we have already gone around at least once in
244
+ // the ring buffer.
245
+ size_t pos = ringbuffer_->position();
246
+ // Only clear during the first round of ringbuffer writes. On
247
+ // subsequent rounds data in the ringbuffer would be affected.
248
+ if (pos <= ringbuffer_->mask()) {
249
+ // This is the first time when the ring buffer is being written.
250
+ // We clear 3 bytes just after the bytes that have been copied from
251
+ // the input buffer.
252
+ //
253
+ // The ringbuffer has a "tail" that holds a copy of the beginning,
254
+ // but only once the ring buffer has been fully written once, i.e.,
255
+ // pos <= mask. For the first time, we need to write values
256
+ // in this tail (where index may be larger than mask), so that
257
+ // we have exactly defined behavior and don't read un-initialized
258
+ // memory. Due to performance reasons, hashing reads data using a
259
+ // LOAD32, which can go 3 bytes beyond the bytes written in the
260
+ // ringbuffer.
261
+ memset(ringbuffer_->start() + pos, 0, 3);
262
+ }
263
+ }
264
+
265
+ void BrotliCompressor::BrotliSetCustomDictionary(
266
+ const size_t size, const uint8_t* dict) {
267
+ CopyInputToRingBuffer(size, dict);
268
+ last_flush_pos_ = size;
269
+ last_processed_pos_ = size;
270
+ if (size > 0) {
271
+ prev_byte_ = dict[size - 1];
272
+ }
273
+ if (size > 1) {
274
+ prev_byte2_ = dict[size - 2];
275
+ }
276
+ hashers_->PrependCustomDictionary(hash_type_, size, dict);
277
+ }
278
+
279
+ bool BrotliCompressor::WriteBrotliData(const bool is_last,
280
+ const bool force_flush,
281
+ size_t* out_size,
282
+ uint8_t** output) {
283
+ const size_t bytes = input_pos_ - last_processed_pos_;
284
+ const uint8_t* data = ringbuffer_->start();
285
+ const size_t mask = ringbuffer_->mask();
286
+
287
+ if (bytes > input_block_size()) {
288
+ return false;
289
+ }
290
+
291
+ bool utf8_mode =
292
+ params_.quality >= 9 &&
293
+ IsMostlyUTF8(&data[last_processed_pos_ & mask], bytes, kMinUTF8Ratio);
294
+
295
+ if (literal_cost_.get()) {
296
+ if (utf8_mode) {
297
+ EstimateBitCostsForLiteralsUTF8(last_processed_pos_, bytes, mask,
298
+ literal_cost_mask_, data,
299
+ literal_cost_.get());
300
+ } else {
301
+ EstimateBitCostsForLiterals(last_processed_pos_, bytes, mask,
302
+ literal_cost_mask_,
303
+ data, literal_cost_.get());
304
+ }
305
+ }
306
+ CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
307
+ literal_cost_.get(),
308
+ literal_cost_mask_,
309
+ max_backward_distance_,
310
+ params_.quality,
311
+ hashers_.get(),
312
+ hash_type_,
313
+ dist_cache_,
314
+ &last_insert_len_,
315
+ &commands_[num_commands_],
316
+ &num_commands_,
317
+ &num_literals_);
318
+
319
+ // For quality 1 there is no block splitting, so we buffer at most this much
320
+ // literals and commands.
321
+ static const int kMaxNumDelayedSymbols = 0x2fff;
322
+ int max_length = std::min<int>(mask + 1, 1 << kMaxInputBlockBits);
323
+ if (!is_last && !force_flush &&
324
+ (params_.quality >= kMinQualityForBlockSplit ||
325
+ (num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
326
+ num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
327
+ input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
328
+ // Everything will happen later.
329
+ last_processed_pos_ = input_pos_;
330
+ *out_size = 0;
331
+ return true;
332
+ }
333
+
334
+ // Create the last insert-only command.
335
+ if (last_insert_len_ > 0) {
336
+ brotli::Command cmd(last_insert_len_);
337
+ commands_[num_commands_++] = cmd;
338
+ num_literals_ += last_insert_len_;
339
+ last_insert_len_ = 0;
340
+ }
341
+
342
+ return WriteMetaBlockInternal(is_last, utf8_mode, out_size, output);
343
+ }
344
+
345
+ // Decide about the context map based on the ability of the prediction
346
+ // ability of the previous byte UTF8-prefix on the next byte. The
347
+ // prediction ability is calculated as shannon entropy. Here we need
348
+ // shannon entropy instead of 'BitsEntropy' since the prefix will be
349
+ // encoded with the remaining 6 bits of the following byte, and
350
+ // BitsEntropy will assume that symbol to be stored alone using Huffman
351
+ // coding.
352
+ void ChooseContextMap(int quality,
353
+ int* bigram_histo,
354
+ int* num_literal_contexts,
355
+ const int** literal_context_map) {
356
+ int monogram_histo[3] = { 0 };
357
+ int two_prefix_histo[6] = { 0 };
358
+ int total = 0;
359
+ for (int i = 0; i < 9; ++i) {
360
+ total += bigram_histo[i];
361
+ monogram_histo[i % 3] += bigram_histo[i];
362
+ int j = i;
363
+ if (j >= 6) {
364
+ j -= 6;
365
+ }
366
+ two_prefix_histo[j] += bigram_histo[i];
367
+ }
368
+ int dummy;
369
+ double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
370
+ double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
371
+ ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
372
+ double entropy3 = 0;
373
+ for (int k = 0; k < 3; ++k) {
374
+ entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
375
+ }
376
+ entropy1 *= (1.0 / total);
377
+ entropy2 *= (1.0 / total);
378
+ entropy3 *= (1.0 / total);
379
+
380
+ static const int kStaticContextMapContinuation[64] = {
381
+ 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385
+ };
386
+ static const int kStaticContextMapSimpleUTF8[64] = {
387
+ 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391
+ };
392
+ if (quality < 7) {
393
+ // 3 context models is a bit slower, don't use it at lower qualities.
394
+ entropy3 = entropy1 * 10;
395
+ }
396
+ // If expected savings by symbol are less than 0.2 bits, skip the
397
+ // context modeling -- in exchange for faster decoding speed.
398
+ if (entropy1 - entropy2 < 0.2 &&
399
+ entropy1 - entropy3 < 0.2) {
400
+ *num_literal_contexts = 1;
401
+ } else if (entropy2 - entropy3 < 0.02) {
402
+ *num_literal_contexts = 2;
403
+ *literal_context_map = kStaticContextMapSimpleUTF8;
404
+ } else {
405
+ *num_literal_contexts = 3;
406
+ *literal_context_map = kStaticContextMapContinuation;
407
+ }
408
+ }
409
+
410
+ void DecideOverLiteralContextModeling(const uint8_t* input,
411
+ size_t start_pos,
412
+ size_t length,
413
+ size_t mask,
414
+ int quality,
415
+ int* literal_context_mode,
416
+ int* num_literal_contexts,
417
+ const int** literal_context_map) {
418
+ if (quality < kMinQualityForContextModeling || length < 64) {
419
+ return;
420
+ }
421
+ // Gather bigram data of the UTF8 byte prefixes. To make the analysis of
422
+ // UTF8 data faster we only examine 64 byte long strides at every 4kB
423
+ // intervals.
424
+ const size_t end_pos = start_pos + length;
425
+ int bigram_prefix_histo[9] = { 0 };
426
+ for (; start_pos + 64 < end_pos; start_pos += 4096) {
427
+ static const int lut[4] = { 0, 0, 1, 2 };
428
+ const size_t stride_end_pos = start_pos + 64;
429
+ int prev = lut[input[start_pos & mask] >> 6] * 3;
430
+ for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
431
+ const uint8_t literal = input[pos & mask];
432
+ ++bigram_prefix_histo[prev + lut[literal >> 6]];
433
+ prev = lut[literal >> 6] * 3;
434
+ }
435
+ }
436
+ *literal_context_mode = CONTEXT_UTF8;
437
+ ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
438
+ literal_context_map);
439
+ }
440
+
441
+ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
442
+ const bool utf8_mode,
443
+ size_t* out_size,
444
+ uint8_t** output) {
445
+ const size_t bytes = input_pos_ - last_flush_pos_;
446
+ const uint8_t* data = ringbuffer_->start();
447
+ const size_t mask = ringbuffer_->mask();
448
+ const size_t max_out_size = 2 * bytes + 500;
449
+ uint8_t* storage = GetBrotliStorage(max_out_size);
450
+ storage[0] = last_byte_;
451
+ int storage_ix = last_byte_bits_;
452
+
453
+ bool uncompressed = false;
454
+ if (num_commands_ < (bytes >> 8) + 2) {
455
+ if (num_literals_ > 0.99 * bytes) {
456
+ int literal_histo[256] = { 0 };
457
+ static const int kSampleRate = 13;
458
+ static const double kMinEntropy = 7.92;
459
+ static const double kBitCostThreshold = bytes * kMinEntropy / kSampleRate;
460
+ for (int i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
461
+ ++literal_histo[data[i & mask]];
462
+ }
463
+ if (BitsEntropy(literal_histo, 256) > kBitCostThreshold) {
464
+ uncompressed = true;
465
+ }
466
+ }
467
+ }
468
+
469
+ if (bytes == 0) {
470
+ if (!StoreCompressedMetaBlockHeader(is_last, 0, &storage_ix, &storage[0])) {
471
+ return false;
472
+ }
473
+ storage_ix = (storage_ix + 7) & ~7;
474
+ } else if (uncompressed) {
475
+ // Restore the distance cache, as its last update by
476
+ // CreateBackwardReferences is now unused.
477
+ memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
478
+ if (!StoreUncompressedMetaBlock(is_last,
479
+ data, last_flush_pos_, mask, bytes,
480
+ &storage_ix,
481
+ &storage[0])) {
482
+ return false;
483
+ }
484
+ } else {
485
+ int num_direct_distance_codes = 0;
486
+ int distance_postfix_bits = 0;
487
+ if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
488
+ num_direct_distance_codes = 12;
489
+ distance_postfix_bits = 1;
490
+ RecomputeDistancePrefixes(commands_.get(),
491
+ num_commands_,
492
+ num_direct_distance_codes,
493
+ distance_postfix_bits);
494
+ }
495
+ if (params_.quality < kMinQualityForBlockSplit) {
496
+ if (!StoreMetaBlockTrivial(data, last_flush_pos_, bytes, mask, is_last,
497
+ commands_.get(), num_commands_,
498
+ &storage_ix,
499
+ &storage[0])) {
500
+ return false;
501
+ }
502
+ } else {
503
+ MetaBlockSplit mb;
504
+ int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
505
+ if (params_.quality <= 9) {
506
+ int num_literal_contexts = 1;
507
+ const int* literal_context_map = NULL;
508
+ DecideOverLiteralContextModeling(data, last_flush_pos_, bytes, mask,
509
+ params_.quality,
510
+ &literal_context_mode,
511
+ &num_literal_contexts,
512
+ &literal_context_map);
513
+ if (literal_context_map == NULL) {
514
+ BuildMetaBlockGreedy(data, last_flush_pos_, mask,
515
+ commands_.get(), num_commands_,
516
+ &mb);
517
+ } else {
518
+ BuildMetaBlockGreedyWithContexts(data, last_flush_pos_, mask,
519
+ prev_byte_, prev_byte2_,
520
+ literal_context_mode,
521
+ num_literal_contexts,
522
+ literal_context_map,
523
+ commands_.get(), num_commands_,
524
+ &mb);
525
+ }
526
+ } else {
527
+ BuildMetaBlock(data, last_flush_pos_, mask,
528
+ prev_byte_, prev_byte2_,
529
+ commands_.get(), num_commands_,
530
+ literal_context_mode,
531
+ &mb);
532
+ }
533
+ if (params_.quality >= kMinQualityForOptimizeHistograms) {
534
+ OptimizeHistograms(num_direct_distance_codes,
535
+ distance_postfix_bits,
536
+ &mb);
537
+ }
538
+ if (!StoreMetaBlock(data, last_flush_pos_, bytes, mask,
539
+ prev_byte_, prev_byte2_,
540
+ is_last,
541
+ num_direct_distance_codes,
542
+ distance_postfix_bits,
543
+ literal_context_mode,
544
+ commands_.get(), num_commands_,
545
+ mb,
546
+ &storage_ix,
547
+ &storage[0])) {
548
+ return false;
549
+ }
550
+ }
551
+ if (bytes + 4 < (storage_ix >> 3)) {
552
+ // Restore the distance cache and last byte.
553
+ memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
554
+ storage[0] = last_byte_;
555
+ storage_ix = last_byte_bits_;
556
+ if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
557
+ bytes, &storage_ix, &storage[0])) {
558
+ return false;
559
+ }
560
+ }
561
+ }
562
+ last_byte_ = storage[storage_ix >> 3];
563
+ last_byte_bits_ = storage_ix & 7;
564
+ last_flush_pos_ = input_pos_;
565
+ last_processed_pos_ = input_pos_;
566
+ prev_byte_ = data[(last_flush_pos_ - 1) & mask];
567
+ prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
568
+ num_commands_ = 0;
569
+ num_literals_ = 0;
570
+ // Save the state of the distance cache in case we need to restore it for
571
+ // emitting an uncompressed block.
572
+ memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
573
+ *output = &storage[0];
574
+ *out_size = storage_ix >> 3;
575
+ return true;
576
+ }
577
+
578
+ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
579
+ const uint8_t* input_buffer,
580
+ const bool is_last,
581
+ size_t* encoded_size,
582
+ uint8_t* encoded_buffer) {
583
+ CopyInputToRingBuffer(input_size, input_buffer);
584
+ size_t out_size = 0;
585
+ uint8_t* output;
586
+ if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) ||
587
+ out_size > *encoded_size) {
588
+ return false;
589
+ }
590
+ if (out_size > 0) {
591
+ memcpy(encoded_buffer, output, out_size);
592
+ }
593
+ *encoded_size = out_size;
594
+ return true;
595
+ }
596
+
597
+ bool BrotliCompressor::WriteMetadata(const size_t input_size,
598
+ const uint8_t* input_buffer,
599
+ const bool is_last,
600
+ size_t* encoded_size,
601
+ uint8_t* encoded_buffer) {
602
+ if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {
603
+ return false;
604
+ }
605
+ int storage_ix = last_byte_bits_;
606
+ encoded_buffer[0] = last_byte_;
607
+ WriteBits(1, 0, &storage_ix, encoded_buffer);
608
+ WriteBits(2, 3, &storage_ix, encoded_buffer);
609
+ WriteBits(1, 0, &storage_ix, encoded_buffer);
610
+ if (input_size == 0) {
611
+ WriteBits(2, 0, &storage_ix, encoded_buffer);
612
+ *encoded_size = (storage_ix + 7) >> 3;
613
+ } else {
614
+ size_t nbits = Log2Floor(input_size - 1) + 1;
615
+ size_t nbytes = (nbits + 7) / 8;
616
+ WriteBits(2, nbytes, &storage_ix, encoded_buffer);
617
+ WriteBits(8 * nbytes, input_size - 1, &storage_ix, encoded_buffer);
618
+ size_t hdr_size = (storage_ix + 7) >> 3;
619
+ memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
620
+ *encoded_size = hdr_size + input_size;
621
+ }
622
+ if (is_last) {
623
+ encoded_buffer[(*encoded_size)++] = 3;
624
+ }
625
+ last_byte_ = 0;
626
+ last_byte_bits_ = 0;
627
+ return true;
628
+ }
629
+
630
+ bool BrotliCompressor::FinishStream(
631
+ size_t* encoded_size, uint8_t* encoded_buffer) {
632
+ return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
633
+ }
634
+
635
+ int BrotliCompressBuffer(BrotliParams params,
636
+ size_t input_size,
637
+ const uint8_t* input_buffer,
638
+ size_t* encoded_size,
639
+ uint8_t* encoded_buffer) {
640
+ if (*encoded_size == 0) {
641
+ // Output buffer needs at least one byte.
642
+ return 0;
643
+ }
644
+ BrotliCompressor compressor(params);
645
+ BrotliMemIn in(input_buffer, input_size);
646
+ BrotliMemOut out(encoded_buffer, *encoded_size);
647
+ if (!BrotliCompress(params, &in, &out)) {
648
+ return 0;
649
+ }
650
+ *encoded_size = out.position();
651
+ return 1;
652
+ }
653
+
654
+ size_t CopyOneBlockToRingBuffer(BrotliIn* r, BrotliCompressor* compressor) {
655
+ const size_t block_size = compressor->input_block_size();
656
+ size_t bytes_read = 0;
657
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(
658
+ r->Read(block_size, &bytes_read));
659
+ if (data == NULL) {
660
+ return 0;
661
+ }
662
+ compressor->CopyInputToRingBuffer(bytes_read, data);
663
+
664
+ // Read more bytes until block_size is filled or an EOF (data == NULL) is
665
+ // received. This is useful to get deterministic compressed output for the
666
+ // same input no matter how r->Read splits the input to chunks.
667
+ for (size_t remaining = block_size - bytes_read; remaining > 0; ) {
668
+ size_t more_bytes_read = 0;
669
+ data = reinterpret_cast<const uint8_t*>(
670
+ r->Read(remaining, &more_bytes_read));
671
+ if (data == NULL) {
672
+ break;
673
+ }
674
+ compressor->CopyInputToRingBuffer(more_bytes_read, data);
675
+ bytes_read += more_bytes_read;
676
+ remaining -= more_bytes_read;
677
+ }
678
+ return bytes_read;
679
+ }
680
+
681
+ bool BrotliInIsFinished(BrotliIn* r) {
682
+ size_t read_bytes;
683
+ return r->Read(0, &read_bytes) == NULL;
684
+ }
685
+
686
+ int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
687
+ return BrotliCompressWithCustomDictionary(0, nullptr, params, in, out);
688
+ }
689
+
690
+ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
691
+ BrotliParams params,
692
+ BrotliIn* in, BrotliOut* out) {
693
+ size_t in_bytes = 0;
694
+ size_t out_bytes = 0;
695
+ uint8_t* output;
696
+ bool final_block = false;
697
+ BrotliCompressor compressor(params);
698
+ if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
699
+ while (!final_block) {
700
+ in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
701
+ final_block = in_bytes == 0 || BrotliInIsFinished(in);
702
+ out_bytes = 0;
703
+ if (!compressor.WriteBrotliData(final_block,
704
+ /* force_flush = */ false,
705
+ &out_bytes, &output)) {
706
+ return false;
707
+ }
708
+ if (out_bytes > 0 && !out->Write(output, out_bytes)) {
709
+ return false;
710
+ }
711
+ }
712
+ return true;
713
+ }
714
+
715
+ } // namespace brotli