brotli 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.travis.yml +11 -3
- data/Gemfile +2 -0
- data/ext/brotli/brotli.c +279 -0
- data/ext/brotli/brotli.h +2 -0
- data/ext/brotli/buffer.c +95 -0
- data/ext/brotli/buffer.h +19 -0
- data/ext/brotli/extconf.rb +21 -81
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/dec/bit_reader.c +5 -5
- data/vendor/brotli/dec/bit_reader.h +15 -15
- data/vendor/brotli/dec/context.h +1 -1
- data/vendor/brotli/dec/decode.c +433 -348
- data/vendor/brotli/dec/decode.h +74 -48
- data/vendor/brotli/dec/huffman.c +5 -4
- data/vendor/brotli/dec/huffman.h +4 -4
- data/vendor/brotli/dec/port.h +2 -95
- data/vendor/brotli/dec/prefix.h +5 -3
- data/vendor/brotli/dec/state.c +15 -27
- data/vendor/brotli/dec/state.h +21 -17
- data/vendor/brotli/dec/transform.h +1 -1
- data/vendor/brotli/enc/backward_references.c +892 -0
- data/vendor/brotli/enc/backward_references.h +85 -102
- data/vendor/brotli/enc/backward_references_inc.h +147 -0
- data/vendor/brotli/enc/bit_cost.c +35 -0
- data/vendor/brotli/enc/bit_cost.h +23 -121
- data/vendor/brotli/enc/bit_cost_inc.h +127 -0
- data/vendor/brotli/enc/block_encoder_inc.h +33 -0
- data/vendor/brotli/enc/block_splitter.c +197 -0
- data/vendor/brotli/enc/block_splitter.h +40 -50
- data/vendor/brotli/enc/block_splitter_inc.h +432 -0
- data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
- data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
- data/vendor/brotli/enc/cluster.c +56 -0
- data/vendor/brotli/enc/cluster.h +23 -305
- data/vendor/brotli/enc/cluster_inc.h +315 -0
- data/vendor/brotli/enc/command.h +83 -76
- data/vendor/brotli/enc/compress_fragment.c +747 -0
- data/vendor/brotli/enc/compress_fragment.h +48 -37
- data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
- data/vendor/brotli/enc/compressor.cc +139 -0
- data/vendor/brotli/enc/compressor.h +146 -0
- data/vendor/brotli/enc/context.h +102 -96
- data/vendor/brotli/enc/dictionary_hash.h +9 -5
- data/vendor/brotli/enc/encode.c +1562 -0
- data/vendor/brotli/enc/encode.h +211 -199
- data/vendor/brotli/enc/encode_parallel.cc +161 -151
- data/vendor/brotli/enc/encode_parallel.h +7 -8
- data/vendor/brotli/enc/entropy_encode.c +501 -0
- data/vendor/brotli/enc/entropy_encode.h +107 -89
- data/vendor/brotli/enc/entropy_encode_static.h +29 -62
- data/vendor/brotli/enc/fast_log.h +26 -20
- data/vendor/brotli/enc/find_match_length.h +23 -20
- data/vendor/brotli/enc/hash.h +614 -871
- data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
- data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
- data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
- data/vendor/brotli/enc/histogram.c +95 -0
- data/vendor/brotli/enc/histogram.h +49 -83
- data/vendor/brotli/enc/histogram_inc.h +51 -0
- data/vendor/brotli/enc/literal_cost.c +178 -0
- data/vendor/brotli/enc/literal_cost.h +16 -10
- data/vendor/brotli/enc/memory.c +181 -0
- data/vendor/brotli/enc/memory.h +62 -0
- data/vendor/brotli/enc/metablock.c +515 -0
- data/vendor/brotli/enc/metablock.h +87 -57
- data/vendor/brotli/enc/metablock_inc.h +183 -0
- data/vendor/brotli/enc/port.h +73 -47
- data/vendor/brotli/enc/prefix.h +34 -61
- data/vendor/brotli/enc/quality.h +130 -0
- data/vendor/brotli/enc/ringbuffer.h +137 -122
- data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
- data/vendor/brotli/enc/static_dict.h +23 -18
- data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
- data/vendor/brotli/enc/streams.cc +7 -7
- data/vendor/brotli/enc/streams.h +32 -32
- data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
- data/vendor/brotli/enc/utf8_util.h +16 -9
- data/vendor/brotli/enc/write_bits.h +49 -43
- metadata +34 -25
- data/ext/brotli/brotli.cc +0 -181
- data/vendor/brotli/dec/Makefile +0 -12
- data/vendor/brotli/dec/dictionary.c +0 -9466
- data/vendor/brotli/dec/dictionary.h +0 -38
- data/vendor/brotli/dec/types.h +0 -38
- data/vendor/brotli/enc/Makefile +0 -14
- data/vendor/brotli/enc/backward_references.cc +0 -858
- data/vendor/brotli/enc/block_splitter.cc +0 -505
- data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
- data/vendor/brotli/enc/compress_fragment.cc +0 -701
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
- data/vendor/brotli/enc/dictionary.cc +0 -9466
- data/vendor/brotli/enc/dictionary.h +0 -41
- data/vendor/brotli/enc/encode.cc +0 -1180
- data/vendor/brotli/enc/entropy_encode.cc +0 -480
- data/vendor/brotli/enc/histogram.cc +0 -67
- data/vendor/brotli/enc/literal_cost.cc +0 -165
- data/vendor/brotli/enc/metablock.cc +0 -539
- data/vendor/brotli/enc/transform.h +0 -248
- data/vendor/brotli/enc/types.h +0 -29
@@ -0,0 +1,230 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2010 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, USE_DICTIONARY */
|
9
|
+
|
10
|
+
#define HashLongestMatchQuickly HASHER()
|
11
|
+
|
12
|
+
#define BUCKET_SIZE (1 << BUCKET_BITS)
|
13
|
+
|
14
|
+
#define HASH_MAP_SIZE (4 << BUCKET_BITS)
|
15
|
+
|
16
|
+
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
|
17
|
+
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
|
18
|
+
|
19
|
+
/* HashBytes is the function that chooses the bucket to place
|
20
|
+
the address in. The HashLongestMatch and HashLongestMatchQuickly
|
21
|
+
classes have separate, different implementations of hashing. */
|
22
|
+
static uint32_t FN(HashBytes)(const uint8_t *data) {
|
23
|
+
/* Computing a hash based on 5 bytes works much better for
|
24
|
+
qualities 1 and 3, where the next hash value is likely to replace */
|
25
|
+
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
|
26
|
+
/* The higher bits contain more mixture from the multiplication,
|
27
|
+
so we take our results from there. */
|
28
|
+
return (uint32_t)(h >> (64 - BUCKET_BITS));
|
29
|
+
}
|
30
|
+
|
31
|
+
/* A (forgetful) hash table to the data seen by the compressor, to
|
32
|
+
help create backward references to previous data.
|
33
|
+
|
34
|
+
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
|
35
|
+
given index, BUCKET_SWEEP buckets are used to store values of a key. */
|
36
|
+
typedef struct HashLongestMatchQuickly {
|
37
|
+
uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
|
38
|
+
/* True if buckets_ array needs to be initialized. */
|
39
|
+
BROTLI_BOOL is_dirty_;
|
40
|
+
DictionarySearchStatictics dict_search_stats_;
|
41
|
+
} HashLongestMatchQuickly;
|
42
|
+
|
43
|
+
static void FN(Reset)(HashLongestMatchQuickly* self) {
|
44
|
+
self->is_dirty_ = BROTLI_TRUE;
|
45
|
+
DictionarySearchStaticticsReset(&self->dict_search_stats_);
|
46
|
+
}
|
47
|
+
|
48
|
+
static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
|
49
|
+
if (self->is_dirty_) {
|
50
|
+
/* It is not strictly necessary to fill this buffer here, but
|
51
|
+
not filling will make the results of the compression stochastic
|
52
|
+
(but correct). This is because random data would cause the
|
53
|
+
system to find accidentally good backward references here and there. */
|
54
|
+
memset(&self->buckets_[0], 0, sizeof(self->buckets_));
|
55
|
+
self->is_dirty_ = BROTLI_FALSE;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
|
60
|
+
size_t num) {
|
61
|
+
size_t i;
|
62
|
+
for (i = 0; i < num; ++i) {
|
63
|
+
const uint32_t key = FN(HashBytes)(&data[i]);
|
64
|
+
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
|
65
|
+
}
|
66
|
+
if (num != 0) {
|
67
|
+
self->is_dirty_ = BROTLI_FALSE;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
static void FN(Init)(
|
72
|
+
MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
|
73
|
+
const BrotliEncoderParams* params, size_t position, size_t bytes,
|
74
|
+
BROTLI_BOOL is_last) {
|
75
|
+
/* Choose which init method is faster.
|
76
|
+
Init() is about 100 times faster than InitForData(). */
|
77
|
+
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
|
78
|
+
BROTLI_UNUSED(m);
|
79
|
+
BROTLI_UNUSED(params);
|
80
|
+
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
|
81
|
+
FN(InitForData)(self, data, bytes);
|
82
|
+
} else {
|
83
|
+
FN(InitEmpty)(self);
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
/* Look at 5 bytes at &data[ix & mask].
|
88
|
+
Compute a hash from these, and store the value somewhere within
|
89
|
+
[ix .. ix+3]. */
|
90
|
+
static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
|
91
|
+
const uint8_t *data, const size_t mask, const size_t ix) {
|
92
|
+
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
|
93
|
+
/* Wiggle the value with the bucket sweep range. */
|
94
|
+
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
|
95
|
+
self->buckets_[key + off] = (uint32_t)ix;
|
96
|
+
}
|
97
|
+
|
98
|
+
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
|
99
|
+
const uint8_t *data, const size_t mask, const size_t ix_start,
|
100
|
+
const size_t ix_end) {
|
101
|
+
size_t i;
|
102
|
+
for (i = ix_start; i < ix_end; ++i) {
|
103
|
+
FN(Store)(self, data, mask, i);
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
108
|
+
HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
|
109
|
+
const uint8_t* ringbuffer, size_t ringbuffer_mask) {
|
110
|
+
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
111
|
+
/* Prepare the hashes for three last bytes of the last write.
|
112
|
+
These could not be calculated before, since they require knowledge
|
113
|
+
of both the previous and the current block. */
|
114
|
+
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
|
115
|
+
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
|
116
|
+
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
121
|
+
up to the length of max_length and stores the position cur_ix in the
|
122
|
+
hash table.
|
123
|
+
|
124
|
+
Does not look for matches longer than max_length.
|
125
|
+
Does not look for matches further away than max_backward.
|
126
|
+
Writes the best match into |out|.
|
127
|
+
Returns true if match is found, otherwise false. */
|
128
|
+
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
129
|
+
HashLongestMatchQuickly* self, const uint8_t* BROTLI_RESTRICT data,
|
130
|
+
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
|
131
|
+
const size_t cur_ix, const size_t max_length, const size_t max_backward,
|
132
|
+
HasherSearchResult* BROTLI_RESTRICT out) {
|
133
|
+
const size_t best_len_in = out->len;
|
134
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
135
|
+
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
136
|
+
int compare_char = data[cur_ix_masked + best_len_in];
|
137
|
+
score_t best_score = out->score;
|
138
|
+
size_t best_len = best_len_in;
|
139
|
+
size_t cached_backward = (size_t)distance_cache[0];
|
140
|
+
size_t prev_ix = cur_ix - cached_backward;
|
141
|
+
BROTLI_BOOL is_match_found = BROTLI_FALSE;
|
142
|
+
out->len_x_code = 0;
|
143
|
+
if (prev_ix < cur_ix) {
|
144
|
+
prev_ix &= (uint32_t)ring_buffer_mask;
|
145
|
+
if (compare_char == data[prev_ix + best_len]) {
|
146
|
+
size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
147
|
+
&data[cur_ix_masked],
|
148
|
+
max_length);
|
149
|
+
if (len >= 4) {
|
150
|
+
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
151
|
+
best_len = len;
|
152
|
+
out->len = len;
|
153
|
+
out->distance = cached_backward;
|
154
|
+
out->score = best_score;
|
155
|
+
compare_char = data[cur_ix_masked + best_len];
|
156
|
+
if (BUCKET_SWEEP == 1) {
|
157
|
+
self->buckets_[key] = (uint32_t)cur_ix;
|
158
|
+
return BROTLI_TRUE;
|
159
|
+
} else {
|
160
|
+
is_match_found = BROTLI_TRUE;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
}
|
165
|
+
if (BUCKET_SWEEP == 1) {
|
166
|
+
size_t backward;
|
167
|
+
size_t len;
|
168
|
+
/* Only one to look for, don't bother to prepare for a loop. */
|
169
|
+
prev_ix = self->buckets_[key];
|
170
|
+
self->buckets_[key] = (uint32_t)cur_ix;
|
171
|
+
backward = cur_ix - prev_ix;
|
172
|
+
prev_ix &= (uint32_t)ring_buffer_mask;
|
173
|
+
if (compare_char != data[prev_ix + best_len_in]) {
|
174
|
+
return BROTLI_FALSE;
|
175
|
+
}
|
176
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
177
|
+
return BROTLI_FALSE;
|
178
|
+
}
|
179
|
+
len = FindMatchLengthWithLimit(&data[prev_ix],
|
180
|
+
&data[cur_ix_masked],
|
181
|
+
max_length);
|
182
|
+
if (len >= 4) {
|
183
|
+
out->len = len;
|
184
|
+
out->distance = backward;
|
185
|
+
out->score = BackwardReferenceScore(len, backward);
|
186
|
+
return BROTLI_TRUE;
|
187
|
+
}
|
188
|
+
} else {
|
189
|
+
uint32_t *bucket = self->buckets_ + key;
|
190
|
+
int i;
|
191
|
+
prev_ix = *bucket++;
|
192
|
+
for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
|
193
|
+
const size_t backward = cur_ix - prev_ix;
|
194
|
+
size_t len;
|
195
|
+
prev_ix &= (uint32_t)ring_buffer_mask;
|
196
|
+
if (compare_char != data[prev_ix + best_len]) {
|
197
|
+
continue;
|
198
|
+
}
|
199
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
200
|
+
continue;
|
201
|
+
}
|
202
|
+
len = FindMatchLengthWithLimit(&data[prev_ix],
|
203
|
+
&data[cur_ix_masked],
|
204
|
+
max_length);
|
205
|
+
if (len >= 4) {
|
206
|
+
const score_t score = BackwardReferenceScore(len, backward);
|
207
|
+
if (best_score < score) {
|
208
|
+
best_score = score;
|
209
|
+
best_len = len;
|
210
|
+
out->len = best_len;
|
211
|
+
out->distance = backward;
|
212
|
+
out->score = score;
|
213
|
+
compare_char = data[cur_ix_masked + best_len];
|
214
|
+
is_match_found = BROTLI_TRUE;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
}
|
218
|
+
}
|
219
|
+
if (USE_DICTIONARY && !is_match_found) {
|
220
|
+
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
|
221
|
+
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_TRUE);
|
222
|
+
}
|
223
|
+
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
|
224
|
+
return is_match_found;
|
225
|
+
}
|
226
|
+
|
227
|
+
#undef HASH_MAP_SIZE
|
228
|
+
#undef BUCKET_SIZE
|
229
|
+
|
230
|
+
#undef HashLongestMatchQuickly
|
@@ -0,0 +1,95 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Build per-context histograms of literals, commands and distance codes. */
|
8
|
+
|
9
|
+
#include "./histogram.h"
|
10
|
+
|
11
|
+
#include "./block_splitter.h"
|
12
|
+
#include "./command.h"
|
13
|
+
#include "./context.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
typedef struct BlockSplitIterator {
|
20
|
+
const BlockSplit* split_; /* Not owned. */
|
21
|
+
size_t idx_;
|
22
|
+
size_t type_;
|
23
|
+
size_t length_;
|
24
|
+
} BlockSplitIterator;
|
25
|
+
|
26
|
+
static void InitBlockSplitIterator(BlockSplitIterator* self,
|
27
|
+
const BlockSplit* split) {
|
28
|
+
self->split_ = split;
|
29
|
+
self->idx_ = 0;
|
30
|
+
self->type_ = 0;
|
31
|
+
self->length_ = split->lengths ? split->lengths[0] : 0;
|
32
|
+
}
|
33
|
+
|
34
|
+
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
|
35
|
+
if (self->length_ == 0) {
|
36
|
+
++self->idx_;
|
37
|
+
self->type_ = self->split_->types[self->idx_];
|
38
|
+
self->length_ = self->split_->lengths[self->idx_];
|
39
|
+
}
|
40
|
+
--self->length_;
|
41
|
+
}
|
42
|
+
|
43
|
+
void BrotliBuildHistogramsWithContext(
|
44
|
+
const Command* cmds, const size_t num_commands,
|
45
|
+
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
46
|
+
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
|
47
|
+
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
48
|
+
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
49
|
+
HistogramCommand* insert_and_copy_histograms,
|
50
|
+
HistogramDistance* copy_dist_histograms) {
|
51
|
+
size_t pos = start_pos;
|
52
|
+
BlockSplitIterator literal_it;
|
53
|
+
BlockSplitIterator insert_and_copy_it;
|
54
|
+
BlockSplitIterator dist_it;
|
55
|
+
size_t i;
|
56
|
+
|
57
|
+
InitBlockSplitIterator(&literal_it, literal_split);
|
58
|
+
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
|
59
|
+
InitBlockSplitIterator(&dist_it, dist_split);
|
60
|
+
for (i = 0; i < num_commands; ++i) {
|
61
|
+
const Command* cmd = &cmds[i];
|
62
|
+
size_t j;
|
63
|
+
BlockSplitIteratorNext(&insert_and_copy_it);
|
64
|
+
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
|
65
|
+
cmd->cmd_prefix_);
|
66
|
+
for (j = cmd->insert_len_; j != 0; --j) {
|
67
|
+
size_t context;
|
68
|
+
BlockSplitIteratorNext(&literal_it);
|
69
|
+
context = (literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
|
70
|
+
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
71
|
+
HistogramAddLiteral(&literal_histograms[context],
|
72
|
+
ringbuffer[pos & mask]);
|
73
|
+
prev_byte2 = prev_byte;
|
74
|
+
prev_byte = ringbuffer[pos & mask];
|
75
|
+
++pos;
|
76
|
+
}
|
77
|
+
pos += CommandCopyLen(cmd);
|
78
|
+
if (CommandCopyLen(cmd)) {
|
79
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
80
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
81
|
+
if (cmd->cmd_prefix_ >= 128) {
|
82
|
+
size_t context;
|
83
|
+
BlockSplitIteratorNext(&dist_it);
|
84
|
+
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
|
85
|
+
CommandDistanceContext(cmd);
|
86
|
+
HistogramAddDistance(©_dist_histograms[context],
|
87
|
+
cmd->dist_prefix_);
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
94
|
+
} /* extern "C" */
|
95
|
+
#endif
|
@@ -4,91 +4,57 @@
|
|
4
4
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
5
|
*/
|
6
6
|
|
7
|
-
|
7
|
+
/* Models the histograms of literals, commands and distance codes. */
|
8
8
|
|
9
9
|
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
10
10
|
#define BROTLI_ENC_HISTOGRAM_H_
|
11
11
|
|
12
|
-
#include <
|
13
|
-
#include <limits>
|
14
|
-
#include <vector>
|
15
|
-
#include "./context.h"
|
16
|
-
#include "./command.h"
|
17
|
-
#include "./fast_log.h"
|
18
|
-
#include "./prefix.h"
|
19
|
-
#include "./types.h"
|
20
|
-
|
21
|
-
namespace brotli {
|
22
|
-
|
23
|
-
struct BlockSplit;
|
24
|
-
|
25
|
-
// A simple container for histograms of data in blocks.
|
26
|
-
template<int kDataSize>
|
27
|
-
struct Histogram {
|
28
|
-
Histogram(void) {
|
29
|
-
Clear();
|
30
|
-
}
|
31
|
-
void Clear(void) {
|
32
|
-
memset(data_, 0, sizeof(data_));
|
33
|
-
total_count_ = 0;
|
34
|
-
bit_cost_ = std::numeric_limits<double>::infinity();
|
35
|
-
}
|
36
|
-
void Add(size_t val) {
|
37
|
-
++data_[val];
|
38
|
-
++total_count_;
|
39
|
-
}
|
40
|
-
void Remove(size_t val) {
|
41
|
-
--data_[val];
|
42
|
-
--total_count_;
|
43
|
-
}
|
44
|
-
template<typename DataType>
|
45
|
-
void Add(const DataType *p, size_t n) {
|
46
|
-
total_count_ += n;
|
47
|
-
n += 1;
|
48
|
-
while(--n) ++data_[*p++];
|
49
|
-
}
|
50
|
-
void AddHistogram(const Histogram& v) {
|
51
|
-
total_count_ += v.total_count_;
|
52
|
-
for (size_t i = 0; i < kDataSize; ++i) {
|
53
|
-
data_[i] += v.data_[i];
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
uint32_t data_[kDataSize];
|
58
|
-
size_t total_count_;
|
59
|
-
double bit_cost_;
|
60
|
-
};
|
12
|
+
#include <string.h> /* memset */
|
61
13
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
14
|
+
#include "../common/constants.h"
|
15
|
+
#include "../common/types.h"
|
16
|
+
#include "./block_splitter.h"
|
17
|
+
#include "./command.h"
|
18
|
+
#include "./context.h"
|
19
|
+
#include "./port.h"
|
20
|
+
|
21
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
22
|
+
extern "C" {
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#define FN(X) X ## Literal
|
26
|
+
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
|
27
|
+
#define DataType uint8_t
|
28
|
+
#include "./histogram_inc.h" /* NOLINT(build/include) */
|
29
|
+
#undef DataType
|
30
|
+
#undef DATA_SIZE
|
31
|
+
#undef FN
|
32
|
+
|
33
|
+
#define FN(X) X ## Command
|
34
|
+
#define DataType uint16_t
|
35
|
+
#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
|
36
|
+
#include "./histogram_inc.h" /* NOLINT(build/include) */
|
37
|
+
#undef DATA_SIZE
|
38
|
+
#undef FN
|
39
|
+
|
40
|
+
#define FN(X) X ## Distance
|
41
|
+
#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
|
42
|
+
#include "./histogram_inc.h" /* NOLINT(build/include) */
|
43
|
+
#undef DataType
|
44
|
+
#undef DATA_SIZE
|
45
|
+
#undef FN
|
46
|
+
|
47
|
+
BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
|
48
|
+
const Command* cmds, const size_t num_commands,
|
49
|
+
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
50
|
+
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
|
51
|
+
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
52
|
+
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
53
|
+
HistogramCommand* insert_and_copy_histograms,
|
54
|
+
HistogramDistance* copy_dist_histograms);
|
55
|
+
|
56
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
57
|
+
} /* extern "C" */
|
58
|
+
#endif
|
59
|
+
|
60
|
+
#endif /* BROTLI_ENC_HISTOGRAM_H_ */
|
@@ -0,0 +1,51 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: Histogram, DATA_SIZE, DataType */
|
9
|
+
|
10
|
+
/* A simple container for histograms of data in blocks. */
|
11
|
+
|
12
|
+
typedef struct FN(Histogram) {
|
13
|
+
uint32_t data_[DATA_SIZE];
|
14
|
+
size_t total_count_;
|
15
|
+
double bit_cost_;
|
16
|
+
} FN(Histogram);
|
17
|
+
|
18
|
+
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
|
19
|
+
memset(self->data_, 0, sizeof(self->data_));
|
20
|
+
self->total_count_ = 0;
|
21
|
+
self->bit_cost_ = HUGE_VAL;
|
22
|
+
}
|
23
|
+
|
24
|
+
static BROTLI_INLINE void FN(ClearHistograms)(
|
25
|
+
FN(Histogram)* array, size_t length) {
|
26
|
+
size_t i;
|
27
|
+
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
|
28
|
+
}
|
29
|
+
|
30
|
+
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
|
31
|
+
++self->data_[val];
|
32
|
+
++self->total_count_;
|
33
|
+
}
|
34
|
+
|
35
|
+
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
|
36
|
+
const DataType *p, size_t n) {
|
37
|
+
self->total_count_ += n;
|
38
|
+
n += 1;
|
39
|
+
while (--n) ++self->data_[*p++];
|
40
|
+
}
|
41
|
+
|
42
|
+
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
|
43
|
+
const FN(Histogram)* v) {
|
44
|
+
size_t i;
|
45
|
+
self->total_count_ += v->total_count_;
|
46
|
+
for (i = 0; i < DATA_SIZE; ++i) {
|
47
|
+
self->data_[i] += v->data_[i];
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
|
@@ -0,0 +1,178 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Literal cost model to allow backward reference replacement to be efficient.
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include "./literal_cost.h"
|
11
|
+
|
12
|
+
#include "../common/types.h"
|
13
|
+
#include "./fast_log.h"
|
14
|
+
#include "./port.h"
|
15
|
+
#include "./utf8_util.h"
|
16
|
+
|
17
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
22
|
+
if (c < 128) {
|
23
|
+
return 0; /* Next one is the 'Byte 1' again. */
|
24
|
+
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
|
25
|
+
return BROTLI_MIN(size_t, 1, clamp);
|
26
|
+
} else {
|
27
|
+
/* Let's decide over the last byte if this ends the sequence. */
|
28
|
+
if (last < 0xe0) {
|
29
|
+
return 0; /* Completed two or three byte coding. */
|
30
|
+
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
|
31
|
+
return BROTLI_MIN(size_t, 2, clamp);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
37
|
+
const uint8_t *data) {
|
38
|
+
size_t counts[3] = { 0 };
|
39
|
+
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
|
40
|
+
size_t last_c = 0;
|
41
|
+
size_t utf8_pos = 0;
|
42
|
+
size_t i;
|
43
|
+
for (i = 0; i < len; ++i) {
|
44
|
+
size_t c = data[(pos + i) & mask];
|
45
|
+
utf8_pos = UTF8Position(last_c, c, 2);
|
46
|
+
++counts[utf8_pos];
|
47
|
+
last_c = c;
|
48
|
+
}
|
49
|
+
if (counts[2] < 500) {
|
50
|
+
max_utf8 = 1;
|
51
|
+
}
|
52
|
+
if (counts[1] + counts[2] < 25) {
|
53
|
+
max_utf8 = 0;
|
54
|
+
}
|
55
|
+
return max_utf8;
|
56
|
+
}
|
57
|
+
|
58
|
+
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
59
|
+
const uint8_t *data, float *cost) {
|
60
|
+
/* max_utf8 is 0 (normal ascii single byte modeling),
|
61
|
+
1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). */
|
62
|
+
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
63
|
+
size_t histogram[3][256] = { { 0 } };
|
64
|
+
size_t window_half = 495;
|
65
|
+
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
66
|
+
size_t in_window_utf8[3] = { 0 };
|
67
|
+
|
68
|
+
|
69
|
+
size_t i;
|
70
|
+
{ /* Bootstrap histograms. */
|
71
|
+
size_t last_c = 0;
|
72
|
+
size_t utf8_pos = 0;
|
73
|
+
for (i = 0; i < in_window; ++i) {
|
74
|
+
size_t c = data[(pos + i) & mask];
|
75
|
+
++histogram[utf8_pos][c];
|
76
|
+
++in_window_utf8[utf8_pos];
|
77
|
+
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
78
|
+
last_c = c;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
/* Compute bit costs with sliding window. */
|
83
|
+
for (i = 0; i < len; ++i) {
|
84
|
+
if (i >= window_half) {
|
85
|
+
/* Remove a byte in the past. */
|
86
|
+
size_t c =
|
87
|
+
i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
|
88
|
+
size_t last_c =
|
89
|
+
i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
|
90
|
+
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
91
|
+
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
92
|
+
--in_window_utf8[utf8_pos2];
|
93
|
+
}
|
94
|
+
if (i + window_half < len) {
|
95
|
+
/* Add a byte in the future. */
|
96
|
+
size_t c = data[(pos + i + window_half - 1) & mask];
|
97
|
+
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
98
|
+
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
99
|
+
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
100
|
+
++in_window_utf8[utf8_pos2];
|
101
|
+
}
|
102
|
+
{
|
103
|
+
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
104
|
+
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
105
|
+
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
106
|
+
size_t masked_pos = (pos + i) & mask;
|
107
|
+
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
108
|
+
double lit_cost;
|
109
|
+
if (histo == 0) {
|
110
|
+
histo = 1;
|
111
|
+
}
|
112
|
+
lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
113
|
+
lit_cost += 0.02905;
|
114
|
+
if (lit_cost < 1.0) {
|
115
|
+
lit_cost *= 0.5;
|
116
|
+
lit_cost += 0.5;
|
117
|
+
}
|
118
|
+
/* Make the first bytes more expensive -- seems to help, not sure why.
|
119
|
+
Perhaps because the entropy source is changing its properties
|
120
|
+
rapidly in the beginning of the file, perhaps because the beginning
|
121
|
+
of the data is a statistical "anomaly". */
|
122
|
+
if (i < 2000) {
|
123
|
+
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
|
124
|
+
}
|
125
|
+
cost[i] = (float)lit_cost;
|
126
|
+
}
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
131
|
+
const uint8_t *data, float *cost) {
|
132
|
+
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
133
|
+
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
134
|
+
return;
|
135
|
+
} else {
|
136
|
+
size_t histogram[256] = { 0 };
|
137
|
+
size_t window_half = 2000;
|
138
|
+
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
139
|
+
|
140
|
+
/* Bootstrap histogram. */
|
141
|
+
size_t i;
|
142
|
+
for (i = 0; i < in_window; ++i) {
|
143
|
+
++histogram[data[(pos + i) & mask]];
|
144
|
+
}
|
145
|
+
|
146
|
+
/* Compute bit costs with sliding window. */
|
147
|
+
for (i = 0; i < len; ++i) {
|
148
|
+
size_t histo;
|
149
|
+
if (i >= window_half) {
|
150
|
+
/* Remove a byte in the past. */
|
151
|
+
--histogram[data[(pos + i - window_half) & mask]];
|
152
|
+
--in_window;
|
153
|
+
}
|
154
|
+
if (i + window_half < len) {
|
155
|
+
/* Add a byte in the future. */
|
156
|
+
++histogram[data[(pos + i + window_half) & mask]];
|
157
|
+
++in_window;
|
158
|
+
}
|
159
|
+
histo = histogram[data[(pos + i) & mask]];
|
160
|
+
if (histo == 0) {
|
161
|
+
histo = 1;
|
162
|
+
}
|
163
|
+
{
|
164
|
+
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
165
|
+
lit_cost += 0.029;
|
166
|
+
if (lit_cost < 1.0) {
|
167
|
+
lit_cost *= 0.5;
|
168
|
+
lit_cost += 0.5;
|
169
|
+
}
|
170
|
+
cost[i] = (float)lit_cost;
|
171
|
+
}
|
172
|
+
}
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
176
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
177
|
+
} /* extern "C" */
|
178
|
+
#endif
|