brotli 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +6 -3
- data/.github/workflows/publish.yml +7 -17
- data/.gitmodules +1 -1
- data/README.md +2 -2
- data/ext/brotli/brotli.c +8 -0
- data/ext/brotli/extconf.rb +6 -0
- data/lib/brotli/version.rb +1 -1
- data/test/brotli_test.rb +14 -1
- data/test/test_helper.rb +1 -0
- data/vendor/brotli/c/common/constants.c +1 -1
- data/vendor/brotli/c/common/constants.h +2 -1
- data/vendor/brotli/c/common/context.c +1 -1
- data/vendor/brotli/c/common/dictionary.c +5 -3
- data/vendor/brotli/c/common/platform.c +2 -1
- data/vendor/brotli/c/common/platform.h +60 -113
- data/vendor/brotli/c/common/shared_dictionary.c +521 -0
- data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
- data/vendor/brotli/c/common/transform.c +1 -1
- data/vendor/brotli/c/common/version.h +31 -6
- data/vendor/brotli/c/dec/bit_reader.c +10 -8
- data/vendor/brotli/c/dec/bit_reader.h +172 -100
- data/vendor/brotli/c/dec/decode.c +467 -200
- data/vendor/brotli/c/dec/huffman.c +7 -4
- data/vendor/brotli/c/dec/huffman.h +2 -1
- data/vendor/brotli/c/dec/prefix.h +2 -1
- data/vendor/brotli/c/dec/state.c +33 -9
- data/vendor/brotli/c/dec/state.h +70 -35
- data/vendor/brotli/c/enc/backward_references.c +81 -19
- data/vendor/brotli/c/enc/backward_references.h +5 -4
- data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
- data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
- data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
- data/vendor/brotli/c/enc/bit_cost.c +8 -7
- data/vendor/brotli/c/enc/bit_cost.h +5 -4
- data/vendor/brotli/c/enc/block_splitter.c +37 -14
- data/vendor/brotli/c/enc/block_splitter.h +5 -4
- data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
- data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
- data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
- data/vendor/brotli/c/enc/cluster.c +10 -9
- data/vendor/brotli/c/enc/cluster.h +7 -6
- data/vendor/brotli/c/enc/cluster_inc.h +25 -20
- data/vendor/brotli/c/enc/command.c +1 -1
- data/vendor/brotli/c/enc/command.h +5 -4
- data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
- data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
- data/vendor/brotli/c/enc/compress_fragment.c +93 -83
- data/vendor/brotli/c/enc/compress_fragment.h +32 -7
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
- data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
- data/vendor/brotli/c/enc/encode.c +473 -404
- data/vendor/brotli/c/enc/encoder_dict.c +611 -4
- data/vendor/brotli/c/enc/encoder_dict.h +117 -3
- data/vendor/brotli/c/enc/entropy_encode.c +3 -2
- data/vendor/brotli/c/enc/entropy_encode.h +2 -1
- data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
- data/vendor/brotli/c/enc/fast_log.c +1 -1
- data/vendor/brotli/c/enc/fast_log.h +2 -1
- data/vendor/brotli/c/enc/find_match_length.h +15 -22
- data/vendor/brotli/c/enc/hash.h +285 -45
- data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
- data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
- data/vendor/brotli/c/enc/histogram.c +4 -4
- data/vendor/brotli/c/enc/histogram.h +7 -6
- data/vendor/brotli/c/enc/literal_cost.c +20 -15
- data/vendor/brotli/c/enc/literal_cost.h +4 -2
- data/vendor/brotli/c/enc/memory.c +29 -5
- data/vendor/brotli/c/enc/memory.h +19 -2
- data/vendor/brotli/c/enc/metablock.c +72 -58
- data/vendor/brotli/c/enc/metablock.h +9 -8
- data/vendor/brotli/c/enc/metablock_inc.h +8 -6
- data/vendor/brotli/c/enc/params.h +4 -3
- data/vendor/brotli/c/enc/prefix.h +3 -2
- data/vendor/brotli/c/enc/quality.h +40 -3
- data/vendor/brotli/c/enc/ringbuffer.h +4 -3
- data/vendor/brotli/c/enc/state.h +104 -0
- data/vendor/brotli/c/enc/static_dict.c +60 -4
- data/vendor/brotli/c/enc/static_dict.h +3 -2
- data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
- data/vendor/brotli/c/enc/utf8_util.c +1 -1
- data/vendor/brotli/c/enc/utf8_util.h +2 -1
- data/vendor/brotli/c/enc/write_bits.h +2 -1
- data/vendor/brotli/c/include/brotli/decode.h +67 -2
- data/vendor/brotli/c/include/brotli/encode.h +55 -2
- data/vendor/brotli/c/include/brotli/port.h +28 -11
- data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
- metadata +9 -3
@@ -46,17 +46,17 @@ static void FN(RandomSample)(uint32_t* seed,
|
|
46
46
|
static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
|
47
47
|
size_t stride,
|
48
48
|
size_t num_histograms,
|
49
|
-
HistogramType* histograms
|
49
|
+
HistogramType* histograms,
|
50
|
+
HistogramType* tmp) {
|
50
51
|
size_t iters =
|
51
52
|
kIterMulForRefining * length / stride + kMinItersForRefining;
|
52
53
|
uint32_t seed = 7;
|
53
54
|
size_t iter;
|
54
55
|
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
|
55
56
|
for (iter = 0; iter < iters; ++iter) {
|
56
|
-
|
57
|
-
FN(
|
58
|
-
FN(
|
59
|
-
FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
|
57
|
+
FN(HistogramClear)(tmp);
|
58
|
+
FN(RandomSample)(&seed, data, length, stride, tmp);
|
59
|
+
FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
|
60
60
|
}
|
61
61
|
}
|
62
62
|
|
@@ -71,46 +71,56 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
|
|
71
71
|
double* cost,
|
72
72
|
uint8_t* switch_signal,
|
73
73
|
uint8_t* block_id) {
|
74
|
-
const size_t
|
75
|
-
const size_t
|
74
|
+
const size_t alphabet_size = FN(HistogramDataSize)();
|
75
|
+
const size_t bitmap_len = (num_histograms + 7) >> 3;
|
76
76
|
size_t num_blocks = 1;
|
77
|
+
size_t byte_ix;
|
77
78
|
size_t i;
|
78
79
|
size_t j;
|
79
80
|
BROTLI_DCHECK(num_histograms <= 256);
|
81
|
+
|
82
|
+
/* Trivial case: single historgram -> single block type. */
|
80
83
|
if (num_histograms <= 1) {
|
81
84
|
for (i = 0; i < length; ++i) {
|
82
85
|
block_id[i] = 0;
|
83
86
|
}
|
84
87
|
return 1;
|
85
88
|
}
|
86
|
-
|
89
|
+
|
90
|
+
/* Fill bitcost for each symbol of all histograms.
|
91
|
+
* Non-existing symbol cost: 2 + log2(total_count).
|
92
|
+
* Regular symbol cost: -log2(symbol_count / total_count). */
|
93
|
+
memset(insert_cost, 0,
|
94
|
+
sizeof(insert_cost[0]) * alphabet_size * num_histograms);
|
87
95
|
for (i = 0; i < num_histograms; ++i) {
|
88
96
|
insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
|
89
97
|
}
|
90
|
-
for (i =
|
98
|
+
for (i = alphabet_size; i != 0;) {
|
99
|
+
/* Reverse order to use the 0-th row as a temporary storage. */
|
91
100
|
--i;
|
92
101
|
for (j = 0; j < num_histograms; ++j) {
|
93
102
|
insert_cost[i * num_histograms + j] =
|
94
103
|
insert_cost[j] - BitCost(histograms[j].data_[i]);
|
95
104
|
}
|
96
105
|
}
|
97
|
-
|
98
|
-
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
|
106
|
+
|
99
107
|
/* After each iteration of this loop, cost[k] will contain the difference
|
100
108
|
between the minimum cost of arriving at the current byte position using
|
101
109
|
entropy code k, and the minimum cost of arriving at the current byte
|
102
110
|
position. This difference is capped at the block switch cost, and if it
|
103
111
|
reaches block switch cost, it means that when we trace back from the last
|
104
112
|
position, we need to switch here. */
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
size_t
|
113
|
+
memset(cost, 0, sizeof(cost[0]) * num_histograms);
|
114
|
+
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
|
115
|
+
for (byte_ix = 0; byte_ix < length; ++byte_ix) {
|
116
|
+
size_t ix = byte_ix * bitmap_len;
|
117
|
+
size_t symbol = data[byte_ix];
|
118
|
+
size_t insert_cost_ix = symbol * num_histograms;
|
109
119
|
double min_cost = 1e99;
|
110
120
|
double block_switch_cost = block_switch_bitcost;
|
111
121
|
size_t k;
|
112
122
|
for (k = 0; k < num_histograms; ++k) {
|
113
|
-
/* We are coding the symbol
|
123
|
+
/* We are coding the symbol with entropy code k. */
|
114
124
|
cost[k] += insert_cost[insert_cost_ix + k];
|
115
125
|
if (cost[k] < min_cost) {
|
116
126
|
min_cost = cost[k];
|
@@ -126,20 +136,21 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
|
|
126
136
|
if (cost[k] >= block_switch_cost) {
|
127
137
|
const uint8_t mask = (uint8_t)(1u << (k & 7));
|
128
138
|
cost[k] = block_switch_cost;
|
129
|
-
BROTLI_DCHECK((k >> 3) <
|
139
|
+
BROTLI_DCHECK((k >> 3) < bitmap_len);
|
130
140
|
switch_signal[ix + (k >> 3)] |= mask;
|
131
141
|
}
|
132
142
|
}
|
133
143
|
}
|
144
|
+
|
145
|
+
byte_ix = length - 1;
|
134
146
|
{ /* Trace back from the last position and switch at the marked places. */
|
135
|
-
size_t
|
136
|
-
size_t ix = byte_ix * bitmaplen;
|
147
|
+
size_t ix = byte_ix * bitmap_len;
|
137
148
|
uint8_t cur_id = block_id[byte_ix];
|
138
149
|
while (byte_ix > 0) {
|
139
150
|
const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
|
140
|
-
BROTLI_DCHECK(((size_t)cur_id >> 3) <
|
151
|
+
BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
|
141
152
|
--byte_ix;
|
142
|
-
ix -=
|
153
|
+
ix -= bitmap_len;
|
143
154
|
if (switch_signal[ix + (cur_id >> 3)] & mask) {
|
144
155
|
if (cur_id != block_id[byte_ix]) {
|
145
156
|
cur_id = block_id[byte_ix];
|
@@ -185,13 +196,16 @@ static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
|
|
185
196
|
}
|
186
197
|
}
|
187
198
|
|
199
|
+
/* Given the initial partitioning build partitioning with limited number
|
200
|
+
* of histograms (and block types). */
|
188
201
|
static void FN(ClusterBlocks)(MemoryManager* m,
|
189
202
|
const DataType* data, const size_t length,
|
190
203
|
const size_t num_blocks,
|
191
204
|
uint8_t* block_ids,
|
192
205
|
BlockSplit* split) {
|
193
206
|
uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
|
194
|
-
uint32_t*
|
207
|
+
uint32_t* u32 =
|
208
|
+
BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
|
195
209
|
const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
|
196
210
|
(num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
|
197
211
|
size_t all_histograms_size = 0;
|
@@ -214,20 +228,25 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
214
228
|
static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
|
215
229
|
uint32_t* new_index;
|
216
230
|
size_t i;
|
217
|
-
uint32_t sizes
|
218
|
-
uint32_t new_clusters
|
219
|
-
uint32_t symbols
|
220
|
-
uint32_t remap
|
231
|
+
uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
|
232
|
+
uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
|
233
|
+
uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
|
234
|
+
uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
|
235
|
+
uint32_t* BROTLI_RESTRICT const block_lengths =
|
236
|
+
u32 + 4 * HISTOGRAMS_PER_BATCH;
|
237
|
+
/* TODO(eustas): move to arena? */
|
238
|
+
HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
|
221
239
|
|
222
240
|
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
|
223
|
-
BROTLI_IS_NULL(
|
241
|
+
BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
|
224
242
|
BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
|
225
|
-
BROTLI_IS_NULL(pairs)) {
|
243
|
+
BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
|
226
244
|
return;
|
227
245
|
}
|
228
246
|
|
229
|
-
memset(
|
247
|
+
memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
|
230
248
|
|
249
|
+
/* Calculate block lengths (convert repeating values -> series length). */
|
231
250
|
{
|
232
251
|
size_t block_idx = 0;
|
233
252
|
for (i = 0; i < length; ++i) {
|
@@ -240,6 +259,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
240
259
|
BROTLI_DCHECK(block_idx == num_blocks);
|
241
260
|
}
|
242
261
|
|
262
|
+
/* Pre-cluster blocks (cluster batches). */
|
243
263
|
for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
|
244
264
|
const size_t num_to_combine =
|
245
265
|
BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
|
@@ -247,8 +267,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
247
267
|
size_t j;
|
248
268
|
for (j = 0; j < num_to_combine; ++j) {
|
249
269
|
size_t k;
|
270
|
+
size_t block_length = block_lengths[i + j];
|
250
271
|
FN(HistogramClear)(&histograms[j]);
|
251
|
-
for (k = 0; k <
|
272
|
+
for (k = 0; k < block_length; ++k) {
|
252
273
|
FN(HistogramAdd)(&histograms[j], data[pos++]);
|
253
274
|
}
|
254
275
|
histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
|
@@ -257,7 +278,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
257
278
|
sizes[j] = 1;
|
258
279
|
}
|
259
280
|
num_new_clusters = FN(BrotliHistogramCombine)(
|
260
|
-
histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
|
281
|
+
histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
|
261
282
|
num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
|
262
283
|
BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
|
263
284
|
all_histograms_capacity, all_histograms_size + num_new_clusters);
|
@@ -278,6 +299,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
278
299
|
}
|
279
300
|
BROTLI_FREE(m, histograms);
|
280
301
|
|
302
|
+
/* Final clustering. */
|
281
303
|
max_num_pairs =
|
282
304
|
BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
|
283
305
|
if (pairs_capacity < max_num_pairs + 1) {
|
@@ -285,19 +307,19 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
285
307
|
pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
|
286
308
|
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
|
287
309
|
}
|
288
|
-
|
289
310
|
clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
|
290
311
|
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
|
291
312
|
for (i = 0; i < num_clusters; ++i) {
|
292
313
|
clusters[i] = (uint32_t)i;
|
293
314
|
}
|
294
315
|
num_final_clusters = FN(BrotliHistogramCombine)(
|
295
|
-
all_histograms, cluster_size, histogram_symbols, clusters, pairs,
|
316
|
+
all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
|
296
317
|
num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
|
297
318
|
max_num_pairs);
|
298
319
|
BROTLI_FREE(m, pairs);
|
299
320
|
BROTLI_FREE(m, cluster_size);
|
300
321
|
|
322
|
+
/* Assign blocks to final histograms. */
|
301
323
|
new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
|
302
324
|
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
|
303
325
|
for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
|
@@ -305,20 +327,21 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
305
327
|
{
|
306
328
|
uint32_t next_index = 0;
|
307
329
|
for (i = 0; i < num_blocks; ++i) {
|
308
|
-
HistogramType histo;
|
309
330
|
size_t j;
|
310
331
|
uint32_t best_out;
|
311
332
|
double best_bits;
|
312
|
-
FN(HistogramClear)(
|
333
|
+
FN(HistogramClear)(tmp);
|
313
334
|
for (j = 0; j < block_lengths[i]; ++j) {
|
314
|
-
FN(HistogramAdd)(
|
335
|
+
FN(HistogramAdd)(tmp, data[pos++]);
|
315
336
|
}
|
337
|
+
/* Among equally good histograms prefer last used. */
|
338
|
+
/* TODO(eustas): should we give a block-switch discount here? */
|
316
339
|
best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
|
317
|
-
best_bits =
|
318
|
-
|
340
|
+
best_bits = FN(BrotliHistogramBitCostDistance)(
|
341
|
+
tmp, &all_histograms[best_out], tmp + 1);
|
319
342
|
for (j = 0; j < num_final_clusters; ++j) {
|
320
343
|
const double cur_bits = FN(BrotliHistogramBitCostDistance)(
|
321
|
-
|
344
|
+
tmp, &all_histograms[clusters[j]], tmp + 1);
|
322
345
|
if (cur_bits < best_bits) {
|
323
346
|
best_bits = cur_bits;
|
324
347
|
best_out = clusters[j];
|
@@ -330,6 +353,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
330
353
|
}
|
331
354
|
}
|
332
355
|
}
|
356
|
+
BROTLI_FREE(m, tmp);
|
333
357
|
BROTLI_FREE(m, clusters);
|
334
358
|
BROTLI_FREE(m, all_histograms);
|
335
359
|
BROTLI_ENSURE_CAPACITY(
|
@@ -337,6 +361,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
337
361
|
BROTLI_ENSURE_CAPACITY(
|
338
362
|
m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
|
339
363
|
if (BROTLI_IS_OOM(m)) return;
|
364
|
+
|
365
|
+
/* Rewrite final assignment to block-split. There might be less blocks
|
366
|
+
* than |num_blocks| due to clustering. */
|
340
367
|
{
|
341
368
|
uint32_t cur_length = 0;
|
342
369
|
size_t block_idx = 0;
|
@@ -357,28 +384,41 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
357
384
|
split->num_types = (size_t)max_type + 1;
|
358
385
|
}
|
359
386
|
BROTLI_FREE(m, new_index);
|
360
|
-
BROTLI_FREE(m,
|
387
|
+
BROTLI_FREE(m, u32);
|
361
388
|
BROTLI_FREE(m, histogram_symbols);
|
362
389
|
}
|
363
390
|
|
391
|
+
/* Create BlockSplit (partitioning) given the limits, estimates and "effort"
|
392
|
+
* parameters.
|
393
|
+
*
|
394
|
+
* NB: max_histograms is often less than number of histograms allowed by format;
|
395
|
+
* this is done intentionally, to save some "space" for context-aware
|
396
|
+
* clustering (here entropy is estimated for context-free symbols). */
|
364
397
|
static void FN(SplitByteVector)(MemoryManager* m,
|
365
398
|
const DataType* data, const size_t length,
|
366
|
-
const size_t
|
399
|
+
const size_t symbols_per_histogram,
|
367
400
|
const size_t max_histograms,
|
368
401
|
const size_t sampling_stride_length,
|
369
402
|
const double block_switch_cost,
|
370
403
|
const BrotliEncoderParams* params,
|
371
404
|
BlockSplit* split) {
|
372
405
|
const size_t data_size = FN(HistogramDataSize)();
|
373
|
-
size_t num_histograms = length / literals_per_histogram + 1;
|
374
406
|
HistogramType* histograms;
|
407
|
+
HistogramType* tmp;
|
408
|
+
/* Calculate number of histograms; initial estimate is one histogram per
|
409
|
+
* specified amount of symbols; however, this value is capped. */
|
410
|
+
size_t num_histograms = length / symbols_per_histogram + 1;
|
375
411
|
if (num_histograms > max_histograms) {
|
376
412
|
num_histograms = max_histograms;
|
377
413
|
}
|
414
|
+
|
415
|
+
/* Corner case: no input. */
|
378
416
|
if (length == 0) {
|
379
417
|
split->num_types = 1;
|
380
418
|
return;
|
381
|
-
}
|
419
|
+
}
|
420
|
+
|
421
|
+
if (length < kMinLengthForBlockSplitting) {
|
382
422
|
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
383
423
|
split->types, split->types_alloc_size, split->num_blocks + 1);
|
384
424
|
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
@@ -390,7 +430,8 @@ static void FN(SplitByteVector)(MemoryManager* m,
|
|
390
430
|
split->num_blocks++;
|
391
431
|
return;
|
392
432
|
}
|
393
|
-
histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
|
433
|
+
histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
|
434
|
+
tmp = histograms + num_histograms;
|
394
435
|
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
|
395
436
|
/* Find good entropy codes. */
|
396
437
|
FN(InitialEntropyCodes)(data, length,
|
@@ -398,7 +439,7 @@ static void FN(SplitByteVector)(MemoryManager* m,
|
|
398
439
|
num_histograms, histograms);
|
399
440
|
FN(RefineEntropyCodes)(data, length,
|
400
441
|
sampling_stride_length,
|
401
|
-
num_histograms, histograms);
|
442
|
+
num_histograms, histograms, tmp);
|
402
443
|
{
|
403
444
|
/* Find a good path through literals with the good entropy codes. */
|
404
445
|
uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
|