brotli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +1 -1
- data/.travis.yml +2 -1
- data/README.md +1 -1
- data/Rakefile +1 -1
- data/ext/brotli/brotli.cc +1 -1
- data/ext/brotli/extconf.rb +72 -14
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +19 -202
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +23 -30
- data/vendor/brotli/dec/bit_reader.h +270 -141
- data/vendor/brotli/dec/context.h +3 -12
- data/vendor/brotli/dec/decode.c +1813 -1048
- data/vendor/brotli/dec/decode.h +22 -16
- data/vendor/brotli/dec/dictionary.c +9466 -0
- data/vendor/brotli/dec/dictionary.h +6 -9461
- data/vendor/brotli/dec/huffman.c +104 -71
- data/vendor/brotli/dec/huffman.h +19 -28
- data/vendor/brotli/dec/port.h +124 -32
- data/vendor/brotli/dec/prefix.h +4 -13
- data/vendor/brotli/dec/state.c +93 -56
- data/vendor/brotli/dec/state.h +124 -53
- data/vendor/brotli/dec/streams.c +14 -11
- data/vendor/brotli/dec/streams.h +6 -11
- data/vendor/brotli/dec/transform.h +2 -11
- data/vendor/brotli/dec/types.h +21 -19
- data/vendor/brotli/enc/Makefile +4 -1
- data/vendor/brotli/enc/backward_references.cc +87 -94
- data/vendor/brotli/enc/backward_references.h +8 -18
- data/vendor/brotli/enc/bit_cost.h +11 -19
- data/vendor/brotli/enc/block_splitter.cc +43 -48
- data/vendor/brotli/enc/block_splitter.h +7 -16
- data/vendor/brotli/enc/brotli_bit_stream.cc +48 -50
- data/vendor/brotli/enc/brotli_bit_stream.h +7 -16
- data/vendor/brotli/enc/cluster.h +24 -25
- data/vendor/brotli/enc/command.h +34 -41
- data/vendor/brotli/enc/context.h +11 -18
- data/vendor/brotli/enc/dictionary.cc +9466 -0
- data/vendor/brotli/enc/dictionary.h +20 -9464
- data/vendor/brotli/enc/dictionary_hash.h +7 -15
- data/vendor/brotli/enc/encode.cc +80 -148
- data/vendor/brotli/enc/encode.h +19 -29
- data/vendor/brotli/enc/encode_parallel.cc +35 -108
- data/vendor/brotli/enc/encode_parallel.h +7 -16
- data/vendor/brotli/enc/entropy_encode.cc +33 -42
- data/vendor/brotli/enc/entropy_encode.h +8 -16
- data/vendor/brotli/enc/fast_log.h +8 -15
- data/vendor/brotli/enc/find_match_length.h +7 -17
- data/vendor/brotli/enc/hash.h +130 -150
- data/vendor/brotli/enc/histogram.cc +7 -16
- data/vendor/brotli/enc/histogram.h +11 -17
- data/vendor/brotli/enc/literal_cost.cc +28 -35
- data/vendor/brotli/enc/literal_cost.h +9 -23
- data/vendor/brotli/enc/metablock.cc +18 -26
- data/vendor/brotli/enc/metablock.h +6 -14
- data/vendor/brotli/enc/port.h +14 -14
- data/vendor/brotli/enc/prefix.h +11 -18
- data/vendor/brotli/enc/ringbuffer.h +18 -27
- data/vendor/brotli/enc/static_dict.cc +7 -1
- data/vendor/brotli/enc/static_dict.h +7 -15
- data/vendor/brotli/enc/static_dict_lut.h +7 -15
- data/vendor/brotli/enc/streams.cc +15 -28
- data/vendor/brotli/enc/streams.h +27 -35
- data/vendor/brotli/enc/transform.h +9 -16
- data/vendor/brotli/enc/types.h +27 -0
- data/vendor/brotli/enc/utf8_util.cc +82 -0
- data/vendor/brotli/enc/utf8_util.h +25 -0
- data/vendor/brotli/enc/write_bits.h +11 -18
- metadata +7 -2
@@ -1,23 +1,15 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
1
|
+
/* Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
15
7
|
// Hash table on the 4-byte prefixes of static dictionary words.
|
16
8
|
|
17
9
|
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
|
18
10
|
#define BROTLI_ENC_DICTIONARY_HASH_H_
|
19
11
|
|
20
|
-
#include
|
12
|
+
#include "./types.h"
|
21
13
|
|
22
14
|
namespace brotli {
|
23
15
|
|
data/vendor/brotli/enc/encode.cc
CHANGED
@@ -1,17 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
15
7
|
// Implementation of Brotli compressor.
|
16
8
|
|
17
9
|
#include "./encode.h"
|
@@ -31,78 +23,18 @@
|
|
31
23
|
#include "./fast_log.h"
|
32
24
|
#include "./hash.h"
|
33
25
|
#include "./histogram.h"
|
34
|
-
#include "./literal_cost.h"
|
35
26
|
#include "./prefix.h"
|
27
|
+
#include "./utf8_util.h"
|
36
28
|
#include "./write_bits.h"
|
37
29
|
|
38
30
|
namespace brotli {
|
39
31
|
|
40
|
-
static const double kMinUTF8Ratio = 0.75;
|
41
32
|
static const int kMinQualityForBlockSplit = 4;
|
42
33
|
static const int kMinQualityForContextModeling = 5;
|
43
34
|
static const int kMinQualityForOptimizeHistograms = 4;
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
if ((input[0] & 0x80) == 0) {
|
48
|
-
*symbol = input[0];
|
49
|
-
if (*symbol > 0) {
|
50
|
-
return 1;
|
51
|
-
}
|
52
|
-
}
|
53
|
-
// 2-byte UTF8
|
54
|
-
if (size > 1 &&
|
55
|
-
(input[0] & 0xe0) == 0xc0 &&
|
56
|
-
(input[1] & 0xc0) == 0x80) {
|
57
|
-
*symbol = (((input[0] & 0x1f) << 6) |
|
58
|
-
(input[1] & 0x3f));
|
59
|
-
if (*symbol > 0x7f) {
|
60
|
-
return 2;
|
61
|
-
}
|
62
|
-
}
|
63
|
-
// 3-byte UFT8
|
64
|
-
if (size > 2 &&
|
65
|
-
(input[0] & 0xf0) == 0xe0 &&
|
66
|
-
(input[1] & 0xc0) == 0x80 &&
|
67
|
-
(input[2] & 0xc0) == 0x80) {
|
68
|
-
*symbol = (((input[0] & 0x0f) << 12) |
|
69
|
-
((input[1] & 0x3f) << 6) |
|
70
|
-
(input[2] & 0x3f));
|
71
|
-
if (*symbol > 0x7ff) {
|
72
|
-
return 3;
|
73
|
-
}
|
74
|
-
}
|
75
|
-
// 4-byte UFT8
|
76
|
-
if (size > 3 &&
|
77
|
-
(input[0] & 0xf8) == 0xf0 &&
|
78
|
-
(input[1] & 0xc0) == 0x80 &&
|
79
|
-
(input[2] & 0xc0) == 0x80 &&
|
80
|
-
(input[3] & 0xc0) == 0x80) {
|
81
|
-
*symbol = (((input[0] & 0x07) << 18) |
|
82
|
-
((input[1] & 0x3f) << 12) |
|
83
|
-
((input[2] & 0x3f) << 6) |
|
84
|
-
(input[3] & 0x3f));
|
85
|
-
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
86
|
-
return 4;
|
87
|
-
}
|
88
|
-
}
|
89
|
-
// Not UTF8, emit a special symbol above the UTF8-code space
|
90
|
-
*symbol = 0x110000 | input[0];
|
91
|
-
return 1;
|
92
|
-
}
|
93
|
-
|
94
|
-
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
95
|
-
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
|
96
|
-
size_t size_utf8 = 0;
|
97
|
-
size_t pos = 0;
|
98
|
-
while (pos < length) {
|
99
|
-
int symbol;
|
100
|
-
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
|
101
|
-
pos += bytes_read;
|
102
|
-
if (symbol < 0x110000) size_utf8 += bytes_read;
|
103
|
-
}
|
104
|
-
return size_utf8 > min_fraction * length;
|
105
|
-
}
|
35
|
+
// For quality 1 there is no block splitting, so we buffer at most this much
|
36
|
+
// literals and commands.
|
37
|
+
static const int kMaxNumDelayedSymbols = 0x2fff;
|
106
38
|
|
107
39
|
void RecomputeDistancePrefixes(Command* cmds,
|
108
40
|
size_t num_commands,
|
@@ -111,7 +43,7 @@ void RecomputeDistancePrefixes(Command* cmds,
|
|
111
43
|
if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
|
112
44
|
return;
|
113
45
|
}
|
114
|
-
for (
|
46
|
+
for (size_t i = 0; i < num_commands; ++i) {
|
115
47
|
Command* cmd = &cmds[i];
|
116
48
|
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
|
117
49
|
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
@@ -125,10 +57,11 @@ void RecomputeDistancePrefixes(Command* cmds,
|
|
125
57
|
|
126
58
|
uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
|
127
59
|
if (storage_size_ < size) {
|
128
|
-
|
60
|
+
delete[] storage_;
|
61
|
+
storage_ = new uint8_t[size];
|
129
62
|
storage_size_ = size;
|
130
63
|
}
|
131
|
-
return
|
64
|
+
return storage_;
|
132
65
|
}
|
133
66
|
|
134
67
|
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
@@ -142,7 +75,8 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|
142
75
|
last_processed_pos_(0),
|
143
76
|
prev_byte_(0),
|
144
77
|
prev_byte2_(0),
|
145
|
-
storage_size_(0)
|
78
|
+
storage_size_(0),
|
79
|
+
storage_(0) {
|
146
80
|
// Sanitize params.
|
147
81
|
params_.quality = std::max(1, params_.quality);
|
148
82
|
if (params_.lgwin < kMinWindowBits) {
|
@@ -169,15 +103,10 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|
169
103
|
// read_block_size_bits + 1 bits because the copy tail length needs to be
|
170
104
|
// smaller than ringbuffer size.
|
171
105
|
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
|
172
|
-
ringbuffer_
|
173
|
-
if (params_.quality > 9) {
|
174
|
-
literal_cost_mask_ = (1 << params_.lgblock) - 1;
|
175
|
-
literal_cost_.reset(new float[literal_cost_mask_ + 1]);
|
176
|
-
}
|
106
|
+
ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
|
177
107
|
|
178
|
-
|
179
|
-
|
180
|
-
commands_.reset(new brotli::Command[cmd_buffer_size_]);
|
108
|
+
commands_ = 0;
|
109
|
+
cmd_alloc_size_ = 0;
|
181
110
|
|
182
111
|
// Initialize last byte with stream header.
|
183
112
|
if (params_.lgwin == 16) {
|
@@ -187,10 +116,10 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|
187
116
|
last_byte_ = 1;
|
188
117
|
last_byte_bits_ = 7;
|
189
118
|
} else if (params_.lgwin > 17) {
|
190
|
-
last_byte_ = ((params_.lgwin - 17) << 1) | 1;
|
119
|
+
last_byte_ = static_cast<uint8_t>(((params_.lgwin - 17) << 1) | 1);
|
191
120
|
last_byte_bits_ = 4;
|
192
121
|
} else {
|
193
|
-
last_byte_ = ((params_.lgwin - 8) << 4) | 1;
|
122
|
+
last_byte_ = static_cast<uint8_t>(((params_.lgwin - 8) << 4) | 1);
|
194
123
|
last_byte_bits_ = 7;
|
195
124
|
}
|
196
125
|
|
@@ -209,6 +138,10 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|
209
138
|
}
|
210
139
|
|
211
140
|
BrotliCompressor::~BrotliCompressor() {
|
141
|
+
delete[] storage_;
|
142
|
+
free(commands_);
|
143
|
+
delete ringbuffer_;
|
144
|
+
delete hashers_;
|
212
145
|
}
|
213
146
|
|
214
147
|
void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
@@ -216,10 +149,10 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
|
216
149
|
ringbuffer_->Write(input_buffer, input_size);
|
217
150
|
input_pos_ += input_size;
|
218
151
|
|
219
|
-
//
|
220
|
-
// depend on uninitialized data. This makes compression
|
221
|
-
// and it prevents uninitialized memory warnings in Valgrind.
|
222
|
-
// without erasing, the output would be valid (but nondeterministic).
|
152
|
+
// TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
|
153
|
+
// hashing not depend on uninitialized data. This makes compression
|
154
|
+
// deterministic and it prevents uninitialized memory warnings in Valgrind.
|
155
|
+
// Even without erasing, the output would be valid (but nondeterministic).
|
223
156
|
//
|
224
157
|
// Background information: The compressor stores short (at most 8 bytes)
|
225
158
|
// substrings of the input already read in a hash table, and detects
|
@@ -247,7 +180,7 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
|
247
180
|
// subsequent rounds data in the ringbuffer would be affected.
|
248
181
|
if (pos <= ringbuffer_->mask()) {
|
249
182
|
// This is the first time when the ring buffer is being written.
|
250
|
-
// We clear
|
183
|
+
// We clear 7 bytes just after the bytes that have been copied from
|
251
184
|
// the input buffer.
|
252
185
|
//
|
253
186
|
// The ringbuffer has a "tail" that holds a copy of the beginning,
|
@@ -256,9 +189,9 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
|
256
189
|
// in this tail (where index may be larger than mask), so that
|
257
190
|
// we have exactly defined behavior and don't read un-initialized
|
258
191
|
// memory. Due to performance reasons, hashing reads data using a
|
259
|
-
//
|
192
|
+
// LOAD64, which can go 7 bytes beyond the bytes written in the
|
260
193
|
// ringbuffer.
|
261
|
-
memset(ringbuffer_->start() + pos, 0,
|
194
|
+
memset(ringbuffer_->start() + pos, 0, 7);
|
262
195
|
}
|
263
196
|
}
|
264
197
|
|
@@ -288,27 +221,21 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
|
288
221
|
return false;
|
289
222
|
}
|
290
223
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
} else {
|
301
|
-
EstimateBitCostsForLiterals(last_processed_pos_, bytes, mask,
|
302
|
-
literal_cost_mask_,
|
303
|
-
data, literal_cost_.get());
|
304
|
-
}
|
224
|
+
// Theoretical max number of commands is 1 per 2 bytes.
|
225
|
+
size_t newsize = num_commands_ + bytes / 2 + 1;
|
226
|
+
if (newsize > cmd_alloc_size_) {
|
227
|
+
// Reserve a bit more memory to allow merging with a next block
|
228
|
+
// without realloc: that would impact speed.
|
229
|
+
newsize += bytes / 4;
|
230
|
+
cmd_alloc_size_ = newsize;
|
231
|
+
commands_ =
|
232
|
+
static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize));
|
305
233
|
}
|
234
|
+
|
306
235
|
CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
|
307
|
-
literal_cost_.get(),
|
308
|
-
literal_cost_mask_,
|
309
236
|
max_backward_distance_,
|
310
237
|
params_.quality,
|
311
|
-
hashers_
|
238
|
+
hashers_,
|
312
239
|
hash_type_,
|
313
240
|
dist_cache_,
|
314
241
|
&last_insert_len_,
|
@@ -316,16 +243,12 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
|
316
243
|
&num_commands_,
|
317
244
|
&num_literals_);
|
318
245
|
|
319
|
-
|
320
|
-
// literals and commands.
|
321
|
-
static const int kMaxNumDelayedSymbols = 0x2fff;
|
322
|
-
int max_length = std::min<int>(mask + 1, 1 << kMaxInputBlockBits);
|
246
|
+
size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
|
323
247
|
if (!is_last && !force_flush &&
|
324
248
|
(params_.quality >= kMinQualityForBlockSplit ||
|
325
249
|
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
|
326
|
-
num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
|
327
250
|
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
|
328
|
-
// Everything will happen later.
|
251
|
+
// Merge with next input block. Everything will happen later.
|
329
252
|
last_processed_pos_ = input_pos_;
|
330
253
|
*out_size = 0;
|
331
254
|
return true;
|
@@ -339,7 +262,7 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
|
339
262
|
last_insert_len_ = 0;
|
340
263
|
}
|
341
264
|
|
342
|
-
return WriteMetaBlockInternal(is_last,
|
265
|
+
return WriteMetaBlockInternal(is_last, out_size, output);
|
343
266
|
}
|
344
267
|
|
345
268
|
// Decide about the context map based on the ability of the prediction
|
@@ -373,6 +296,8 @@ void ChooseContextMap(int quality,
|
|
373
296
|
for (int k = 0; k < 3; ++k) {
|
374
297
|
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
|
375
298
|
}
|
299
|
+
|
300
|
+
assert(total != 0);
|
376
301
|
entropy1 *= (1.0 / total);
|
377
302
|
entropy2 *= (1.0 / total);
|
378
303
|
entropy3 *= (1.0 / total);
|
@@ -423,7 +348,7 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
|
|
423
348
|
// intervals.
|
424
349
|
const size_t end_pos = start_pos + length;
|
425
350
|
int bigram_prefix_histo[9] = { 0 };
|
426
|
-
for (; start_pos + 64
|
351
|
+
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
|
427
352
|
static const int lut[4] = { 0, 0, 1, 2 };
|
428
353
|
const size_t stride_end_pos = start_pos + 64;
|
429
354
|
int prev = lut[input[start_pos & mask] >> 6] * 3;
|
@@ -439,7 +364,6 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
|
|
439
364
|
}
|
440
365
|
|
441
366
|
bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
442
|
-
const bool utf8_mode,
|
443
367
|
size_t* out_size,
|
444
368
|
uint8_t** output) {
|
445
369
|
const size_t bytes = input_pos_ - last_flush_pos_;
|
@@ -452,15 +376,16 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
|
452
376
|
|
453
377
|
bool uncompressed = false;
|
454
378
|
if (num_commands_ < (bytes >> 8) + 2) {
|
455
|
-
if (num_literals_ > 0.99 * bytes) {
|
379
|
+
if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
|
456
380
|
int literal_histo[256] = { 0 };
|
457
381
|
static const int kSampleRate = 13;
|
458
382
|
static const double kMinEntropy = 7.92;
|
459
|
-
|
460
|
-
|
383
|
+
const double bit_cost_threshold =
|
384
|
+
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
|
385
|
+
for (size_t i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
|
461
386
|
++literal_histo[data[i & mask]];
|
462
387
|
}
|
463
|
-
if (BitsEntropy(literal_histo, 256) >
|
388
|
+
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
|
464
389
|
uncompressed = true;
|
465
390
|
}
|
466
391
|
}
|
@@ -487,21 +412,21 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
|
487
412
|
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
|
488
413
|
num_direct_distance_codes = 12;
|
489
414
|
distance_postfix_bits = 1;
|
490
|
-
RecomputeDistancePrefixes(commands_
|
415
|
+
RecomputeDistancePrefixes(commands_,
|
491
416
|
num_commands_,
|
492
417
|
num_direct_distance_codes,
|
493
418
|
distance_postfix_bits);
|
494
419
|
}
|
495
420
|
if (params_.quality < kMinQualityForBlockSplit) {
|
496
421
|
if (!StoreMetaBlockTrivial(data, last_flush_pos_, bytes, mask, is_last,
|
497
|
-
commands_
|
422
|
+
commands_, num_commands_,
|
498
423
|
&storage_ix,
|
499
424
|
&storage[0])) {
|
500
425
|
return false;
|
501
426
|
}
|
502
427
|
} else {
|
503
428
|
MetaBlockSplit mb;
|
504
|
-
int literal_context_mode =
|
429
|
+
int literal_context_mode = CONTEXT_UTF8;
|
505
430
|
if (params_.quality <= 9) {
|
506
431
|
int num_literal_contexts = 1;
|
507
432
|
const int* literal_context_map = NULL;
|
@@ -512,7 +437,7 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
|
512
437
|
&literal_context_map);
|
513
438
|
if (literal_context_map == NULL) {
|
514
439
|
BuildMetaBlockGreedy(data, last_flush_pos_, mask,
|
515
|
-
commands_
|
440
|
+
commands_, num_commands_,
|
516
441
|
&mb);
|
517
442
|
} else {
|
518
443
|
BuildMetaBlockGreedyWithContexts(data, last_flush_pos_, mask,
|
@@ -520,13 +445,16 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
|
520
445
|
literal_context_mode,
|
521
446
|
num_literal_contexts,
|
522
447
|
literal_context_map,
|
523
|
-
commands_
|
448
|
+
commands_, num_commands_,
|
524
449
|
&mb);
|
525
450
|
}
|
526
451
|
} else {
|
452
|
+
if (!IsMostlyUTF8(data, last_flush_pos_, mask, bytes, kMinUTF8Ratio)) {
|
453
|
+
literal_context_mode = CONTEXT_SIGNED;
|
454
|
+
}
|
527
455
|
BuildMetaBlock(data, last_flush_pos_, mask,
|
528
456
|
prev_byte_, prev_byte2_,
|
529
|
-
commands_
|
457
|
+
commands_, num_commands_,
|
530
458
|
literal_context_mode,
|
531
459
|
&mb);
|
532
460
|
}
|
@@ -541,14 +469,14 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
|
541
469
|
num_direct_distance_codes,
|
542
470
|
distance_postfix_bits,
|
543
471
|
literal_context_mode,
|
544
|
-
commands_
|
472
|
+
commands_, num_commands_,
|
545
473
|
mb,
|
546
474
|
&storage_ix,
|
547
475
|
&storage[0])) {
|
548
476
|
return false;
|
549
477
|
}
|
550
478
|
}
|
551
|
-
if (bytes + 4 < (storage_ix >> 3)) {
|
479
|
+
if (bytes + 4 < static_cast<size_t>(storage_ix >> 3)) {
|
552
480
|
// Restore the distance cache and last byte.
|
553
481
|
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
|
554
482
|
storage[0] = last_byte_;
|
@@ -602,20 +530,24 @@ bool BrotliCompressor::WriteMetadata(const size_t input_size,
|
|
602
530
|
if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {
|
603
531
|
return false;
|
604
532
|
}
|
533
|
+
uint64_t hdr_buffer_data[2];
|
534
|
+
uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]);
|
605
535
|
int storage_ix = last_byte_bits_;
|
606
|
-
|
607
|
-
WriteBits(1, 0, &storage_ix,
|
608
|
-
WriteBits(2, 3, &storage_ix,
|
609
|
-
WriteBits(1, 0, &storage_ix,
|
536
|
+
hdr_buffer[0] = last_byte_;
|
537
|
+
WriteBits(1, 0, &storage_ix, hdr_buffer);
|
538
|
+
WriteBits(2, 3, &storage_ix, hdr_buffer);
|
539
|
+
WriteBits(1, 0, &storage_ix, hdr_buffer);
|
610
540
|
if (input_size == 0) {
|
611
|
-
WriteBits(2, 0, &storage_ix,
|
541
|
+
WriteBits(2, 0, &storage_ix, hdr_buffer);
|
612
542
|
*encoded_size = (storage_ix + 7) >> 3;
|
543
|
+
memcpy(encoded_buffer, hdr_buffer, *encoded_size);
|
613
544
|
} else {
|
614
|
-
|
615
|
-
|
616
|
-
WriteBits(2, nbytes, &storage_ix,
|
617
|
-
WriteBits(8 * nbytes, input_size - 1, &storage_ix,
|
545
|
+
int nbits = Log2Floor(static_cast<uint32_t>(input_size) - 1) + 1;
|
546
|
+
int nbytes = (nbits + 7) / 8;
|
547
|
+
WriteBits(2, nbytes, &storage_ix, hdr_buffer);
|
548
|
+
WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
|
618
549
|
size_t hdr_size = (storage_ix + 7) >> 3;
|
550
|
+
memcpy(encoded_buffer, hdr_buffer, hdr_size);
|
619
551
|
memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
|
620
552
|
*encoded_size = hdr_size + input_size;
|
621
553
|
}
|
@@ -641,7 +573,6 @@ int BrotliCompressBuffer(BrotliParams params,
|
|
641
573
|
// Output buffer needs at least one byte.
|
642
574
|
return 0;
|
643
575
|
}
|
644
|
-
BrotliCompressor compressor(params);
|
645
576
|
BrotliMemIn in(input_buffer, input_size);
|
646
577
|
BrotliMemOut out(encoded_buffer, *encoded_size);
|
647
578
|
if (!BrotliCompress(params, &in, &out)) {
|
@@ -684,7 +615,7 @@ bool BrotliInIsFinished(BrotliIn* r) {
|
|
684
615
|
}
|
685
616
|
|
686
617
|
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
|
687
|
-
return BrotliCompressWithCustomDictionary(0,
|
618
|
+
return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
|
688
619
|
}
|
689
620
|
|
690
621
|
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
@@ -712,4 +643,5 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
|
712
643
|
return true;
|
713
644
|
}
|
714
645
|
|
646
|
+
|
715
647
|
} // namespace brotli
|
data/vendor/brotli/enc/encode.h
CHANGED
@@ -1,24 +1,14 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
15
7
|
// API for Brotli compression
|
16
8
|
|
17
9
|
#ifndef BROTLI_ENC_ENCODE_H_
|
18
10
|
#define BROTLI_ENC_ENCODE_H_
|
19
11
|
|
20
|
-
#include <stddef.h>
|
21
|
-
#include <stdint.h>
|
22
12
|
#include <string>
|
23
13
|
#include <vector>
|
24
14
|
#include "./command.h"
|
@@ -26,6 +16,7 @@
|
|
26
16
|
#include "./ringbuffer.h"
|
27
17
|
#include "./static_dict.h"
|
28
18
|
#include "./streams.h"
|
19
|
+
#include "./types.h"
|
29
20
|
|
30
21
|
namespace brotli {
|
31
22
|
|
@@ -52,7 +43,7 @@ struct BrotliParams {
|
|
52
43
|
// Compression mode for UTF-8 format text input.
|
53
44
|
MODE_TEXT = 1,
|
54
45
|
// Compression mode used in WOFF 2.0.
|
55
|
-
MODE_FONT = 2
|
46
|
+
MODE_FONT = 2
|
56
47
|
};
|
57
48
|
Mode mode;
|
58
49
|
|
@@ -80,12 +71,13 @@ class BrotliCompressor {
|
|
80
71
|
~BrotliCompressor();
|
81
72
|
|
82
73
|
// The maximum input size that can be processed at once.
|
83
|
-
size_t input_block_size() const { return 1 << params_.lgblock; }
|
74
|
+
size_t input_block_size() const { return size_t(1) << params_.lgblock; }
|
84
75
|
|
85
76
|
// Encodes the data in input_buffer as a meta-block and writes it to
|
86
77
|
// encoded_buffer (*encoded_size should be set to the size of
|
87
78
|
// encoded_buffer) and sets *encoded_size to the number of bytes that
|
88
|
-
// was written.
|
79
|
+
// was written. The input_size must be <= input_block_size().
|
80
|
+
// Returns 0 if there was an error and 1 otherwise.
|
89
81
|
bool WriteMetaBlock(const size_t input_size,
|
90
82
|
const uint8_t* input_buffer,
|
91
83
|
const bool is_last,
|
@@ -143,21 +135,18 @@ class BrotliCompressor {
|
|
143
135
|
uint8_t* GetBrotliStorage(size_t size);
|
144
136
|
|
145
137
|
bool WriteMetaBlockInternal(const bool is_last,
|
146
|
-
const bool utf8_mode,
|
147
138
|
size_t* out_size,
|
148
139
|
uint8_t** output);
|
149
140
|
|
150
141
|
BrotliParams params_;
|
151
142
|
int max_backward_distance_;
|
152
|
-
|
143
|
+
Hashers* hashers_;
|
153
144
|
int hash_type_;
|
154
145
|
size_t input_pos_;
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
size_t
|
159
|
-
std::unique_ptr<Command[]> commands_;
|
160
|
-
int num_commands_;
|
146
|
+
RingBuffer* ringbuffer_;
|
147
|
+
size_t cmd_alloc_size_;
|
148
|
+
Command* commands_;
|
149
|
+
size_t num_commands_;
|
161
150
|
int num_literals_;
|
162
151
|
int last_insert_len_;
|
163
152
|
size_t last_flush_pos_;
|
@@ -168,8 +157,8 @@ class BrotliCompressor {
|
|
168
157
|
uint8_t last_byte_bits_;
|
169
158
|
uint8_t prev_byte_;
|
170
159
|
uint8_t prev_byte2_;
|
171
|
-
|
172
|
-
|
160
|
+
size_t storage_size_;
|
161
|
+
uint8_t* storage_;
|
173
162
|
};
|
174
163
|
|
175
164
|
// Compresses the data in input_buffer into encoded_buffer, and sets
|
@@ -191,6 +180,7 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
|
191
180
|
BrotliParams params,
|
192
181
|
BrotliIn* in, BrotliOut* out);
|
193
182
|
|
183
|
+
|
194
184
|
} // namespace brotli
|
195
185
|
|
196
186
|
#endif // BROTLI_ENC_ENCODE_H_
|