brotli 0.2.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +37 -0
- data/.github/workflows/publish.yml +24 -0
- data/.gitmodules +1 -1
- data/Gemfile +6 -3
- data/README.md +2 -2
- data/Rakefile +16 -9
- data/brotli.gemspec +7 -13
- data/ext/brotli/brotli.c +210 -31
- data/ext/brotli/buffer.c +1 -7
- data/ext/brotli/buffer.h +1 -1
- data/ext/brotli/extconf.rb +25 -17
- data/lib/brotli/version.rb +1 -1
- data/test/brotli_test.rb +107 -0
- data/test/brotli_writer_test.rb +36 -0
- data/test/test_helper.rb +8 -0
- data/vendor/brotli/c/common/constants.c +15 -0
- data/vendor/brotli/c/common/constants.h +137 -0
- data/vendor/brotli/c/common/context.c +156 -0
- data/vendor/brotli/c/common/context.h +4 -152
- data/vendor/brotli/c/common/dictionary.bin.br +0 -0
- data/vendor/brotli/c/common/dictionary.c +14 -3
- data/vendor/brotli/c/common/platform.c +23 -0
- data/vendor/brotli/c/common/platform.h +95 -122
- data/vendor/brotli/c/common/shared_dictionary.c +521 -0
- data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
- data/vendor/brotli/c/common/transform.c +60 -4
- data/vendor/brotli/c/common/transform.h +5 -0
- data/vendor/brotli/c/common/version.h +31 -6
- data/vendor/brotli/c/dec/bit_reader.c +34 -4
- data/vendor/brotli/c/dec/bit_reader.h +221 -107
- data/vendor/brotli/c/dec/decode.c +772 -403
- data/vendor/brotli/c/dec/huffman.c +7 -4
- data/vendor/brotli/c/dec/huffman.h +8 -13
- data/vendor/brotli/c/dec/prefix.h +1 -18
- data/vendor/brotli/c/dec/state.c +40 -21
- data/vendor/brotli/c/dec/state.h +201 -59
- data/vendor/brotli/c/enc/backward_references.c +88 -25
- data/vendor/brotli/c/enc/backward_references.h +10 -8
- data/vendor/brotli/c/enc/backward_references_hq.c +194 -80
- data/vendor/brotli/c/enc/backward_references_hq.h +17 -13
- data/vendor/brotli/c/enc/backward_references_inc.h +52 -16
- data/vendor/brotli/c/enc/bit_cost.c +8 -7
- data/vendor/brotli/c/enc/bit_cost.h +5 -4
- data/vendor/brotli/c/enc/block_splitter.c +40 -17
- data/vendor/brotli/c/enc/block_splitter.h +5 -4
- data/vendor/brotli/c/enc/block_splitter_inc.h +99 -49
- data/vendor/brotli/c/enc/brotli_bit_stream.c +142 -137
- data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
- data/vendor/brotli/c/enc/cluster.c +10 -9
- data/vendor/brotli/c/enc/cluster.h +7 -6
- data/vendor/brotli/c/enc/cluster_inc.h +30 -22
- data/vendor/brotli/c/enc/command.c +28 -0
- data/vendor/brotli/c/enc/command.h +17 -16
- data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
- data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
- data/vendor/brotli/c/enc/compress_fragment.c +93 -83
- data/vendor/brotli/c/enc/compress_fragment.h +32 -7
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +100 -88
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
- data/vendor/brotli/c/enc/dictionary_hash.c +1829 -1101
- data/vendor/brotli/c/enc/dictionary_hash.h +2 -1
- data/vendor/brotli/c/enc/encode.c +550 -416
- data/vendor/brotli/c/enc/encoder_dict.c +613 -5
- data/vendor/brotli/c/enc/encoder_dict.h +120 -4
- data/vendor/brotli/c/enc/entropy_encode.c +5 -2
- data/vendor/brotli/c/enc/entropy_encode.h +4 -3
- data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
- data/vendor/brotli/c/enc/fast_log.c +105 -0
- data/vendor/brotli/c/enc/fast_log.h +21 -101
- data/vendor/brotli/c/enc/find_match_length.h +17 -25
- data/vendor/brotli/c/enc/hash.h +350 -120
- data/vendor/brotli/c/enc/hash_composite_inc.h +71 -67
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +92 -51
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +79 -84
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +53 -54
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +93 -62
- data/vendor/brotli/c/enc/hash_rolling_inc.h +25 -29
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +42 -40
- data/vendor/brotli/c/enc/histogram.c +4 -4
- data/vendor/brotli/c/enc/histogram.h +7 -6
- data/vendor/brotli/c/enc/literal_cost.c +20 -15
- data/vendor/brotli/c/enc/literal_cost.h +4 -2
- data/vendor/brotli/c/enc/memory.c +29 -5
- data/vendor/brotli/c/enc/memory.h +43 -14
- data/vendor/brotli/c/enc/metablock.c +95 -85
- data/vendor/brotli/c/enc/metablock.h +9 -8
- data/vendor/brotli/c/enc/metablock_inc.h +9 -7
- data/vendor/brotli/c/enc/params.h +7 -4
- data/vendor/brotli/c/enc/prefix.h +3 -2
- data/vendor/brotli/c/enc/quality.h +40 -3
- data/vendor/brotli/c/enc/ringbuffer.h +8 -4
- data/vendor/brotli/c/enc/state.h +104 -0
- data/vendor/brotli/c/enc/static_dict.c +60 -4
- data/vendor/brotli/c/enc/static_dict.h +3 -2
- data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
- data/vendor/brotli/c/enc/utf8_util.c +2 -2
- data/vendor/brotli/c/enc/utf8_util.h +2 -1
- data/vendor/brotli/c/enc/write_bits.h +29 -26
- data/vendor/brotli/c/include/brotli/decode.h +67 -2
- data/vendor/brotli/c/include/brotli/encode.h +77 -3
- data/vendor/brotli/c/include/brotli/port.h +34 -3
- data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
- metadata +23 -97
- data/.travis.yml +0 -31
- data/docs/Brotli/Error.html +0 -124
- data/docs/Brotli.html +0 -485
- data/docs/_index.html +0 -122
- data/docs/class_list.html +0 -51
- data/docs/css/common.css +0 -1
- data/docs/css/full_list.css +0 -58
- data/docs/css/style.css +0 -496
- data/docs/file.README.html +0 -127
- data/docs/file_list.html +0 -56
- data/docs/frames.html +0 -17
- data/docs/index.html +0 -127
- data/docs/js/app.js +0 -292
- data/docs/js/full_list.js +0 -216
- data/docs/js/jquery.js +0 -4
- data/docs/method_list.html +0 -67
- data/docs/top-level-namespace.html +0 -110
- data/spec/brotli_spec.rb +0 -88
- data/spec/inflate_spec.rb +0 -75
- data/spec/spec_helper.rb +0 -4
@@ -46,17 +46,17 @@ static void FN(RandomSample)(uint32_t* seed,
|
|
46
46
|
static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
|
47
47
|
size_t stride,
|
48
48
|
size_t num_histograms,
|
49
|
-
HistogramType* histograms
|
49
|
+
HistogramType* histograms,
|
50
|
+
HistogramType* tmp) {
|
50
51
|
size_t iters =
|
51
52
|
kIterMulForRefining * length / stride + kMinItersForRefining;
|
52
53
|
uint32_t seed = 7;
|
53
54
|
size_t iter;
|
54
55
|
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
|
55
56
|
for (iter = 0; iter < iters; ++iter) {
|
56
|
-
|
57
|
-
FN(
|
58
|
-
FN(
|
59
|
-
FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
|
57
|
+
FN(HistogramClear)(tmp);
|
58
|
+
FN(RandomSample)(&seed, data, length, stride, tmp);
|
59
|
+
FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
|
60
60
|
}
|
61
61
|
}
|
62
62
|
|
@@ -71,46 +71,56 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
|
|
71
71
|
double* cost,
|
72
72
|
uint8_t* switch_signal,
|
73
73
|
uint8_t* block_id) {
|
74
|
-
const size_t
|
75
|
-
const size_t
|
74
|
+
const size_t alphabet_size = FN(HistogramDataSize)();
|
75
|
+
const size_t bitmap_len = (num_histograms + 7) >> 3;
|
76
76
|
size_t num_blocks = 1;
|
77
|
+
size_t byte_ix;
|
77
78
|
size_t i;
|
78
79
|
size_t j;
|
79
80
|
BROTLI_DCHECK(num_histograms <= 256);
|
81
|
+
|
82
|
+
/* Trivial case: single historgram -> single block type. */
|
80
83
|
if (num_histograms <= 1) {
|
81
84
|
for (i = 0; i < length; ++i) {
|
82
85
|
block_id[i] = 0;
|
83
86
|
}
|
84
87
|
return 1;
|
85
88
|
}
|
86
|
-
|
89
|
+
|
90
|
+
/* Fill bitcost for each symbol of all histograms.
|
91
|
+
* Non-existing symbol cost: 2 + log2(total_count).
|
92
|
+
* Regular symbol cost: -log2(symbol_count / total_count). */
|
93
|
+
memset(insert_cost, 0,
|
94
|
+
sizeof(insert_cost[0]) * alphabet_size * num_histograms);
|
87
95
|
for (i = 0; i < num_histograms; ++i) {
|
88
96
|
insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
|
89
97
|
}
|
90
|
-
for (i =
|
98
|
+
for (i = alphabet_size; i != 0;) {
|
99
|
+
/* Reverse order to use the 0-th row as a temporary storage. */
|
91
100
|
--i;
|
92
101
|
for (j = 0; j < num_histograms; ++j) {
|
93
102
|
insert_cost[i * num_histograms + j] =
|
94
103
|
insert_cost[j] - BitCost(histograms[j].data_[i]);
|
95
104
|
}
|
96
105
|
}
|
97
|
-
|
98
|
-
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
|
106
|
+
|
99
107
|
/* After each iteration of this loop, cost[k] will contain the difference
|
100
108
|
between the minimum cost of arriving at the current byte position using
|
101
109
|
entropy code k, and the minimum cost of arriving at the current byte
|
102
110
|
position. This difference is capped at the block switch cost, and if it
|
103
111
|
reaches block switch cost, it means that when we trace back from the last
|
104
112
|
position, we need to switch here. */
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
size_t
|
113
|
+
memset(cost, 0, sizeof(cost[0]) * num_histograms);
|
114
|
+
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
|
115
|
+
for (byte_ix = 0; byte_ix < length; ++byte_ix) {
|
116
|
+
size_t ix = byte_ix * bitmap_len;
|
117
|
+
size_t symbol = data[byte_ix];
|
118
|
+
size_t insert_cost_ix = symbol * num_histograms;
|
109
119
|
double min_cost = 1e99;
|
110
120
|
double block_switch_cost = block_switch_bitcost;
|
111
121
|
size_t k;
|
112
122
|
for (k = 0; k < num_histograms; ++k) {
|
113
|
-
/* We are coding the symbol
|
123
|
+
/* We are coding the symbol with entropy code k. */
|
114
124
|
cost[k] += insert_cost[insert_cost_ix + k];
|
115
125
|
if (cost[k] < min_cost) {
|
116
126
|
min_cost = cost[k];
|
@@ -126,20 +136,21 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
|
|
126
136
|
if (cost[k] >= block_switch_cost) {
|
127
137
|
const uint8_t mask = (uint8_t)(1u << (k & 7));
|
128
138
|
cost[k] = block_switch_cost;
|
129
|
-
BROTLI_DCHECK((k >> 3) <
|
139
|
+
BROTLI_DCHECK((k >> 3) < bitmap_len);
|
130
140
|
switch_signal[ix + (k >> 3)] |= mask;
|
131
141
|
}
|
132
142
|
}
|
133
143
|
}
|
144
|
+
|
145
|
+
byte_ix = length - 1;
|
134
146
|
{ /* Trace back from the last position and switch at the marked places. */
|
135
|
-
size_t
|
136
|
-
size_t ix = byte_ix * bitmaplen;
|
147
|
+
size_t ix = byte_ix * bitmap_len;
|
137
148
|
uint8_t cur_id = block_id[byte_ix];
|
138
149
|
while (byte_ix > 0) {
|
139
150
|
const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
|
140
|
-
BROTLI_DCHECK(((size_t)cur_id >> 3) <
|
151
|
+
BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
|
141
152
|
--byte_ix;
|
142
|
-
ix -=
|
153
|
+
ix -= bitmap_len;
|
143
154
|
if (switch_signal[ix + (cur_id >> 3)] & mask) {
|
144
155
|
if (cur_id != block_id[byte_ix]) {
|
145
156
|
cur_id = block_id[byte_ix];
|
@@ -185,13 +196,16 @@ static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
|
|
185
196
|
}
|
186
197
|
}
|
187
198
|
|
199
|
+
/* Given the initial partitioning build partitioning with limited number
|
200
|
+
* of histograms (and block types). */
|
188
201
|
static void FN(ClusterBlocks)(MemoryManager* m,
|
189
202
|
const DataType* data, const size_t length,
|
190
203
|
const size_t num_blocks,
|
191
204
|
uint8_t* block_ids,
|
192
205
|
BlockSplit* split) {
|
193
206
|
uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
|
194
|
-
uint32_t*
|
207
|
+
uint32_t* u32 =
|
208
|
+
BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
|
195
209
|
const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
|
196
210
|
(num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
|
197
211
|
size_t all_histograms_size = 0;
|
@@ -214,15 +228,25 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
214
228
|
static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
|
215
229
|
uint32_t* new_index;
|
216
230
|
size_t i;
|
217
|
-
uint32_t sizes
|
218
|
-
uint32_t new_clusters
|
219
|
-
uint32_t symbols
|
220
|
-
uint32_t remap
|
231
|
+
uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
|
232
|
+
uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
|
233
|
+
uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
|
234
|
+
uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
|
235
|
+
uint32_t* BROTLI_RESTRICT const block_lengths =
|
236
|
+
u32 + 4 * HISTOGRAMS_PER_BATCH;
|
237
|
+
/* TODO(eustas): move to arena? */
|
238
|
+
HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
|
221
239
|
|
222
|
-
if (BROTLI_IS_OOM(m))
|
240
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
|
241
|
+
BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
|
242
|
+
BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
|
243
|
+
BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
|
244
|
+
return;
|
245
|
+
}
|
223
246
|
|
224
|
-
memset(
|
247
|
+
memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
|
225
248
|
|
249
|
+
/* Calculate block lengths (convert repeating values -> series length). */
|
226
250
|
{
|
227
251
|
size_t block_idx = 0;
|
228
252
|
for (i = 0; i < length; ++i) {
|
@@ -235,6 +259,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
235
259
|
BROTLI_DCHECK(block_idx == num_blocks);
|
236
260
|
}
|
237
261
|
|
262
|
+
/* Pre-cluster blocks (cluster batches). */
|
238
263
|
for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
|
239
264
|
const size_t num_to_combine =
|
240
265
|
BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
|
@@ -242,8 +267,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
242
267
|
size_t j;
|
243
268
|
for (j = 0; j < num_to_combine; ++j) {
|
244
269
|
size_t k;
|
270
|
+
size_t block_length = block_lengths[i + j];
|
245
271
|
FN(HistogramClear)(&histograms[j]);
|
246
|
-
for (k = 0; k <
|
272
|
+
for (k = 0; k < block_length; ++k) {
|
247
273
|
FN(HistogramAdd)(&histograms[j], data[pos++]);
|
248
274
|
}
|
249
275
|
histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
|
@@ -252,7 +278,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
252
278
|
sizes[j] = 1;
|
253
279
|
}
|
254
280
|
num_new_clusters = FN(BrotliHistogramCombine)(
|
255
|
-
histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
|
281
|
+
histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
|
256
282
|
num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
|
257
283
|
BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
|
258
284
|
all_histograms_capacity, all_histograms_size + num_new_clusters);
|
@@ -273,47 +299,49 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
273
299
|
}
|
274
300
|
BROTLI_FREE(m, histograms);
|
275
301
|
|
302
|
+
/* Final clustering. */
|
276
303
|
max_num_pairs =
|
277
304
|
BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
|
278
305
|
if (pairs_capacity < max_num_pairs + 1) {
|
279
306
|
BROTLI_FREE(m, pairs);
|
280
307
|
pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
|
281
|
-
if (BROTLI_IS_OOM(m)) return;
|
308
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
|
282
309
|
}
|
283
|
-
|
284
310
|
clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
|
285
|
-
if (BROTLI_IS_OOM(m)) return;
|
311
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
|
286
312
|
for (i = 0; i < num_clusters; ++i) {
|
287
313
|
clusters[i] = (uint32_t)i;
|
288
314
|
}
|
289
315
|
num_final_clusters = FN(BrotliHistogramCombine)(
|
290
|
-
all_histograms, cluster_size, histogram_symbols, clusters, pairs,
|
316
|
+
all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
|
291
317
|
num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
|
292
318
|
max_num_pairs);
|
293
319
|
BROTLI_FREE(m, pairs);
|
294
320
|
BROTLI_FREE(m, cluster_size);
|
295
321
|
|
322
|
+
/* Assign blocks to final histograms. */
|
296
323
|
new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
|
297
|
-
if (BROTLI_IS_OOM(m)) return;
|
324
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
|
298
325
|
for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
|
299
326
|
pos = 0;
|
300
327
|
{
|
301
328
|
uint32_t next_index = 0;
|
302
329
|
for (i = 0; i < num_blocks; ++i) {
|
303
|
-
HistogramType histo;
|
304
330
|
size_t j;
|
305
331
|
uint32_t best_out;
|
306
332
|
double best_bits;
|
307
|
-
FN(HistogramClear)(
|
333
|
+
FN(HistogramClear)(tmp);
|
308
334
|
for (j = 0; j < block_lengths[i]; ++j) {
|
309
|
-
FN(HistogramAdd)(
|
335
|
+
FN(HistogramAdd)(tmp, data[pos++]);
|
310
336
|
}
|
337
|
+
/* Among equally good histograms prefer last used. */
|
338
|
+
/* TODO(eustas): should we give a block-switch discount here? */
|
311
339
|
best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
|
312
|
-
best_bits =
|
313
|
-
|
340
|
+
best_bits = FN(BrotliHistogramBitCostDistance)(
|
341
|
+
tmp, &all_histograms[best_out], tmp + 1);
|
314
342
|
for (j = 0; j < num_final_clusters; ++j) {
|
315
343
|
const double cur_bits = FN(BrotliHistogramBitCostDistance)(
|
316
|
-
|
344
|
+
tmp, &all_histograms[clusters[j]], tmp + 1);
|
317
345
|
if (cur_bits < best_bits) {
|
318
346
|
best_bits = cur_bits;
|
319
347
|
best_out = clusters[j];
|
@@ -325,6 +353,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
325
353
|
}
|
326
354
|
}
|
327
355
|
}
|
356
|
+
BROTLI_FREE(m, tmp);
|
328
357
|
BROTLI_FREE(m, clusters);
|
329
358
|
BROTLI_FREE(m, all_histograms);
|
330
359
|
BROTLI_ENSURE_CAPACITY(
|
@@ -332,6 +361,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
332
361
|
BROTLI_ENSURE_CAPACITY(
|
333
362
|
m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
|
334
363
|
if (BROTLI_IS_OOM(m)) return;
|
364
|
+
|
365
|
+
/* Rewrite final assignment to block-split. There might be less blocks
|
366
|
+
* than |num_blocks| due to clustering. */
|
335
367
|
{
|
336
368
|
uint32_t cur_length = 0;
|
337
369
|
size_t block_idx = 0;
|
@@ -352,28 +384,41 @@ static void FN(ClusterBlocks)(MemoryManager* m,
|
|
352
384
|
split->num_types = (size_t)max_type + 1;
|
353
385
|
}
|
354
386
|
BROTLI_FREE(m, new_index);
|
355
|
-
BROTLI_FREE(m,
|
387
|
+
BROTLI_FREE(m, u32);
|
356
388
|
BROTLI_FREE(m, histogram_symbols);
|
357
389
|
}
|
358
390
|
|
391
|
+
/* Create BlockSplit (partitioning) given the limits, estimates and "effort"
|
392
|
+
* parameters.
|
393
|
+
*
|
394
|
+
* NB: max_histograms is often less than number of histograms allowed by format;
|
395
|
+
* this is done intentionally, to save some "space" for context-aware
|
396
|
+
* clustering (here entropy is estimated for context-free symbols). */
|
359
397
|
static void FN(SplitByteVector)(MemoryManager* m,
|
360
398
|
const DataType* data, const size_t length,
|
361
|
-
const size_t
|
399
|
+
const size_t symbols_per_histogram,
|
362
400
|
const size_t max_histograms,
|
363
401
|
const size_t sampling_stride_length,
|
364
402
|
const double block_switch_cost,
|
365
403
|
const BrotliEncoderParams* params,
|
366
404
|
BlockSplit* split) {
|
367
405
|
const size_t data_size = FN(HistogramDataSize)();
|
368
|
-
size_t num_histograms = length / literals_per_histogram + 1;
|
369
406
|
HistogramType* histograms;
|
407
|
+
HistogramType* tmp;
|
408
|
+
/* Calculate number of histograms; initial estimate is one histogram per
|
409
|
+
* specified amount of symbols; however, this value is capped. */
|
410
|
+
size_t num_histograms = length / symbols_per_histogram + 1;
|
370
411
|
if (num_histograms > max_histograms) {
|
371
412
|
num_histograms = max_histograms;
|
372
413
|
}
|
414
|
+
|
415
|
+
/* Corner case: no input. */
|
373
416
|
if (length == 0) {
|
374
417
|
split->num_types = 1;
|
375
418
|
return;
|
376
|
-
}
|
419
|
+
}
|
420
|
+
|
421
|
+
if (length < kMinLengthForBlockSplitting) {
|
377
422
|
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
378
423
|
split->types, split->types_alloc_size, split->num_blocks + 1);
|
379
424
|
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
@@ -385,15 +430,16 @@ static void FN(SplitByteVector)(MemoryManager* m,
|
|
385
430
|
split->num_blocks++;
|
386
431
|
return;
|
387
432
|
}
|
388
|
-
histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
|
389
|
-
|
433
|
+
histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
|
434
|
+
tmp = histograms + num_histograms;
|
435
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
|
390
436
|
/* Find good entropy codes. */
|
391
437
|
FN(InitialEntropyCodes)(data, length,
|
392
438
|
sampling_stride_length,
|
393
439
|
num_histograms, histograms);
|
394
440
|
FN(RefineEntropyCodes)(data, length,
|
395
441
|
sampling_stride_length,
|
396
|
-
num_histograms, histograms);
|
442
|
+
num_histograms, histograms, tmp);
|
397
443
|
{
|
398
444
|
/* Find a good path through literals with the good entropy codes. */
|
399
445
|
uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
|
@@ -405,7 +451,11 @@ static void FN(SplitByteVector)(MemoryManager* m,
|
|
405
451
|
uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
|
406
452
|
const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
|
407
453
|
size_t i;
|
408
|
-
if (BROTLI_IS_OOM(m))
|
454
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
|
455
|
+
BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
|
456
|
+
BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
|
457
|
+
return;
|
458
|
+
}
|
409
459
|
for (i = 0; i < iters; ++i) {
|
410
460
|
num_blocks = FN(FindBlocks)(data, length,
|
411
461
|
block_switch_cost,
|