brotli 0.2.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +37 -0
  3. data/.github/workflows/publish.yml +24 -0
  4. data/.gitmodules +1 -1
  5. data/Gemfile +6 -3
  6. data/README.md +2 -2
  7. data/Rakefile +16 -9
  8. data/brotli.gemspec +7 -13
  9. data/ext/brotli/brotli.c +210 -31
  10. data/ext/brotli/buffer.c +1 -7
  11. data/ext/brotli/buffer.h +1 -1
  12. data/ext/brotli/extconf.rb +25 -17
  13. data/lib/brotli/version.rb +1 -1
  14. data/test/brotli_test.rb +107 -0
  15. data/test/brotli_writer_test.rb +36 -0
  16. data/test/test_helper.rb +8 -0
  17. data/vendor/brotli/c/common/constants.c +15 -0
  18. data/vendor/brotli/c/common/constants.h +137 -0
  19. data/vendor/brotli/c/common/context.c +156 -0
  20. data/vendor/brotli/c/common/context.h +4 -152
  21. data/vendor/brotli/c/common/dictionary.bin.br +0 -0
  22. data/vendor/brotli/c/common/dictionary.c +14 -3
  23. data/vendor/brotli/c/common/platform.c +23 -0
  24. data/vendor/brotli/c/common/platform.h +95 -122
  25. data/vendor/brotli/c/common/shared_dictionary.c +521 -0
  26. data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  27. data/vendor/brotli/c/common/transform.c +60 -4
  28. data/vendor/brotli/c/common/transform.h +5 -0
  29. data/vendor/brotli/c/common/version.h +31 -6
  30. data/vendor/brotli/c/dec/bit_reader.c +34 -4
  31. data/vendor/brotli/c/dec/bit_reader.h +221 -107
  32. data/vendor/brotli/c/dec/decode.c +772 -403
  33. data/vendor/brotli/c/dec/huffman.c +7 -4
  34. data/vendor/brotli/c/dec/huffman.h +8 -13
  35. data/vendor/brotli/c/dec/prefix.h +1 -18
  36. data/vendor/brotli/c/dec/state.c +40 -21
  37. data/vendor/brotli/c/dec/state.h +201 -59
  38. data/vendor/brotli/c/enc/backward_references.c +88 -25
  39. data/vendor/brotli/c/enc/backward_references.h +10 -8
  40. data/vendor/brotli/c/enc/backward_references_hq.c +194 -80
  41. data/vendor/brotli/c/enc/backward_references_hq.h +17 -13
  42. data/vendor/brotli/c/enc/backward_references_inc.h +52 -16
  43. data/vendor/brotli/c/enc/bit_cost.c +8 -7
  44. data/vendor/brotli/c/enc/bit_cost.h +5 -4
  45. data/vendor/brotli/c/enc/block_splitter.c +40 -17
  46. data/vendor/brotli/c/enc/block_splitter.h +5 -4
  47. data/vendor/brotli/c/enc/block_splitter_inc.h +99 -49
  48. data/vendor/brotli/c/enc/brotli_bit_stream.c +142 -137
  49. data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
  50. data/vendor/brotli/c/enc/cluster.c +10 -9
  51. data/vendor/brotli/c/enc/cluster.h +7 -6
  52. data/vendor/brotli/c/enc/cluster_inc.h +30 -22
  53. data/vendor/brotli/c/enc/command.c +28 -0
  54. data/vendor/brotli/c/enc/command.h +17 -16
  55. data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  56. data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  57. data/vendor/brotli/c/enc/compress_fragment.c +93 -83
  58. data/vendor/brotli/c/enc/compress_fragment.h +32 -7
  59. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +100 -88
  60. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
  61. data/vendor/brotli/c/enc/dictionary_hash.c +1829 -1101
  62. data/vendor/brotli/c/enc/dictionary_hash.h +2 -1
  63. data/vendor/brotli/c/enc/encode.c +550 -416
  64. data/vendor/brotli/c/enc/encoder_dict.c +613 -5
  65. data/vendor/brotli/c/enc/encoder_dict.h +120 -4
  66. data/vendor/brotli/c/enc/entropy_encode.c +5 -2
  67. data/vendor/brotli/c/enc/entropy_encode.h +4 -3
  68. data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
  69. data/vendor/brotli/c/enc/fast_log.c +105 -0
  70. data/vendor/brotli/c/enc/fast_log.h +21 -101
  71. data/vendor/brotli/c/enc/find_match_length.h +17 -25
  72. data/vendor/brotli/c/enc/hash.h +350 -120
  73. data/vendor/brotli/c/enc/hash_composite_inc.h +71 -67
  74. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +92 -51
  75. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +79 -84
  76. data/vendor/brotli/c/enc/hash_longest_match_inc.h +53 -54
  77. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +93 -62
  78. data/vendor/brotli/c/enc/hash_rolling_inc.h +25 -29
  79. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +42 -40
  80. data/vendor/brotli/c/enc/histogram.c +4 -4
  81. data/vendor/brotli/c/enc/histogram.h +7 -6
  82. data/vendor/brotli/c/enc/literal_cost.c +20 -15
  83. data/vendor/brotli/c/enc/literal_cost.h +4 -2
  84. data/vendor/brotli/c/enc/memory.c +29 -5
  85. data/vendor/brotli/c/enc/memory.h +43 -14
  86. data/vendor/brotli/c/enc/metablock.c +95 -85
  87. data/vendor/brotli/c/enc/metablock.h +9 -8
  88. data/vendor/brotli/c/enc/metablock_inc.h +9 -7
  89. data/vendor/brotli/c/enc/params.h +7 -4
  90. data/vendor/brotli/c/enc/prefix.h +3 -2
  91. data/vendor/brotli/c/enc/quality.h +40 -3
  92. data/vendor/brotli/c/enc/ringbuffer.h +8 -4
  93. data/vendor/brotli/c/enc/state.h +104 -0
  94. data/vendor/brotli/c/enc/static_dict.c +60 -4
  95. data/vendor/brotli/c/enc/static_dict.h +3 -2
  96. data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
  97. data/vendor/brotli/c/enc/utf8_util.c +2 -2
  98. data/vendor/brotli/c/enc/utf8_util.h +2 -1
  99. data/vendor/brotli/c/enc/write_bits.h +29 -26
  100. data/vendor/brotli/c/include/brotli/decode.h +67 -2
  101. data/vendor/brotli/c/include/brotli/encode.h +77 -3
  102. data/vendor/brotli/c/include/brotli/port.h +34 -3
  103. data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  104. metadata +23 -97
  105. data/.travis.yml +0 -31
  106. data/docs/Brotli/Error.html +0 -124
  107. data/docs/Brotli.html +0 -485
  108. data/docs/_index.html +0 -122
  109. data/docs/class_list.html +0 -51
  110. data/docs/css/common.css +0 -1
  111. data/docs/css/full_list.css +0 -58
  112. data/docs/css/style.css +0 -496
  113. data/docs/file.README.html +0 -127
  114. data/docs/file_list.html +0 -56
  115. data/docs/frames.html +0 -17
  116. data/docs/index.html +0 -127
  117. data/docs/js/app.js +0 -292
  118. data/docs/js/full_list.js +0 -216
  119. data/docs/js/jquery.js +0 -4
  120. data/docs/method_list.html +0 -67
  121. data/docs/top-level-namespace.html +0 -110
  122. data/spec/brotli_spec.rb +0 -88
  123. data/spec/inflate_spec.rb +0 -75
  124. data/spec/spec_helper.rb +0 -4
@@ -46,17 +46,17 @@ static void FN(RandomSample)(uint32_t* seed,
46
46
  static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
47
47
  size_t stride,
48
48
  size_t num_histograms,
49
- HistogramType* histograms) {
49
+ HistogramType* histograms,
50
+ HistogramType* tmp) {
50
51
  size_t iters =
51
52
  kIterMulForRefining * length / stride + kMinItersForRefining;
52
53
  uint32_t seed = 7;
53
54
  size_t iter;
54
55
  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
55
56
  for (iter = 0; iter < iters; ++iter) {
56
- HistogramType sample;
57
- FN(HistogramClear)(&sample);
58
- FN(RandomSample)(&seed, data, length, stride, &sample);
59
- FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
57
+ FN(HistogramClear)(tmp);
58
+ FN(RandomSample)(&seed, data, length, stride, tmp);
59
+ FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
60
60
  }
61
61
  }
62
62
 
@@ -71,46 +71,56 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
71
71
  double* cost,
72
72
  uint8_t* switch_signal,
73
73
  uint8_t* block_id) {
74
- const size_t data_size = FN(HistogramDataSize)();
75
- const size_t bitmaplen = (num_histograms + 7) >> 3;
74
+ const size_t alphabet_size = FN(HistogramDataSize)();
75
+ const size_t bitmap_len = (num_histograms + 7) >> 3;
76
76
  size_t num_blocks = 1;
77
+ size_t byte_ix;
77
78
  size_t i;
78
79
  size_t j;
79
80
  BROTLI_DCHECK(num_histograms <= 256);
81
+
82
+ /* Trivial case: single historgram -> single block type. */
80
83
  if (num_histograms <= 1) {
81
84
  for (i = 0; i < length; ++i) {
82
85
  block_id[i] = 0;
83
86
  }
84
87
  return 1;
85
88
  }
86
- memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
89
+
90
+ /* Fill bitcost for each symbol of all histograms.
91
+ * Non-existing symbol cost: 2 + log2(total_count).
92
+ * Regular symbol cost: -log2(symbol_count / total_count). */
93
+ memset(insert_cost, 0,
94
+ sizeof(insert_cost[0]) * alphabet_size * num_histograms);
87
95
  for (i = 0; i < num_histograms; ++i) {
88
96
  insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
89
97
  }
90
- for (i = data_size; i != 0;) {
98
+ for (i = alphabet_size; i != 0;) {
99
+ /* Reverse order to use the 0-th row as a temporary storage. */
91
100
  --i;
92
101
  for (j = 0; j < num_histograms; ++j) {
93
102
  insert_cost[i * num_histograms + j] =
94
103
  insert_cost[j] - BitCost(histograms[j].data_[i]);
95
104
  }
96
105
  }
97
- memset(cost, 0, sizeof(cost[0]) * num_histograms);
98
- memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
106
+
99
107
  /* After each iteration of this loop, cost[k] will contain the difference
100
108
  between the minimum cost of arriving at the current byte position using
101
109
  entropy code k, and the minimum cost of arriving at the current byte
102
110
  position. This difference is capped at the block switch cost, and if it
103
111
  reaches block switch cost, it means that when we trace back from the last
104
112
  position, we need to switch here. */
105
- for (i = 0; i < length; ++i) {
106
- const size_t byte_ix = i;
107
- size_t ix = byte_ix * bitmaplen;
108
- size_t insert_cost_ix = data[byte_ix] * num_histograms;
113
+ memset(cost, 0, sizeof(cost[0]) * num_histograms);
114
+ memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
115
+ for (byte_ix = 0; byte_ix < length; ++byte_ix) {
116
+ size_t ix = byte_ix * bitmap_len;
117
+ size_t symbol = data[byte_ix];
118
+ size_t insert_cost_ix = symbol * num_histograms;
109
119
  double min_cost = 1e99;
110
120
  double block_switch_cost = block_switch_bitcost;
111
121
  size_t k;
112
122
  for (k = 0; k < num_histograms; ++k) {
113
- /* We are coding the symbol in data[byte_ix] with entropy code k. */
123
+ /* We are coding the symbol with entropy code k. */
114
124
  cost[k] += insert_cost[insert_cost_ix + k];
115
125
  if (cost[k] < min_cost) {
116
126
  min_cost = cost[k];
@@ -126,20 +136,21 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
126
136
  if (cost[k] >= block_switch_cost) {
127
137
  const uint8_t mask = (uint8_t)(1u << (k & 7));
128
138
  cost[k] = block_switch_cost;
129
- BROTLI_DCHECK((k >> 3) < bitmaplen);
139
+ BROTLI_DCHECK((k >> 3) < bitmap_len);
130
140
  switch_signal[ix + (k >> 3)] |= mask;
131
141
  }
132
142
  }
133
143
  }
144
+
145
+ byte_ix = length - 1;
134
146
  { /* Trace back from the last position and switch at the marked places. */
135
- size_t byte_ix = length - 1;
136
- size_t ix = byte_ix * bitmaplen;
147
+ size_t ix = byte_ix * bitmap_len;
137
148
  uint8_t cur_id = block_id[byte_ix];
138
149
  while (byte_ix > 0) {
139
150
  const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
140
- BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
151
+ BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
141
152
  --byte_ix;
142
- ix -= bitmaplen;
153
+ ix -= bitmap_len;
143
154
  if (switch_signal[ix + (cur_id >> 3)] & mask) {
144
155
  if (cur_id != block_id[byte_ix]) {
145
156
  cur_id = block_id[byte_ix];
@@ -185,13 +196,16 @@ static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
185
196
  }
186
197
  }
187
198
 
199
+ /* Given the initial partitioning build partitioning with limited number
200
+ * of histograms (and block types). */
188
201
  static void FN(ClusterBlocks)(MemoryManager* m,
189
202
  const DataType* data, const size_t length,
190
203
  const size_t num_blocks,
191
204
  uint8_t* block_ids,
192
205
  BlockSplit* split) {
193
206
  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
194
- uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
207
+ uint32_t* u32 =
208
+ BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
195
209
  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
196
210
  (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
197
211
  size_t all_histograms_size = 0;
@@ -214,15 +228,25 @@ static void FN(ClusterBlocks)(MemoryManager* m,
214
228
  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
215
229
  uint32_t* new_index;
216
230
  size_t i;
217
- uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
218
- uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
219
- uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
220
- uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
231
+ uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
232
+ uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
233
+ uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
234
+ uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
235
+ uint32_t* BROTLI_RESTRICT const block_lengths =
236
+ u32 + 4 * HISTOGRAMS_PER_BATCH;
237
+ /* TODO(eustas): move to arena? */
238
+ HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
221
239
 
222
- if (BROTLI_IS_OOM(m)) return;
240
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
241
+ BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
242
+ BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
243
+ BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
244
+ return;
245
+ }
223
246
 
224
- memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
247
+ memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
225
248
 
249
+ /* Calculate block lengths (convert repeating values -> series length). */
226
250
  {
227
251
  size_t block_idx = 0;
228
252
  for (i = 0; i < length; ++i) {
@@ -235,6 +259,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
235
259
  BROTLI_DCHECK(block_idx == num_blocks);
236
260
  }
237
261
 
262
+ /* Pre-cluster blocks (cluster batches). */
238
263
  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
239
264
  const size_t num_to_combine =
240
265
  BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
@@ -242,8 +267,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
242
267
  size_t j;
243
268
  for (j = 0; j < num_to_combine; ++j) {
244
269
  size_t k;
270
+ size_t block_length = block_lengths[i + j];
245
271
  FN(HistogramClear)(&histograms[j]);
246
- for (k = 0; k < block_lengths[i + j]; ++k) {
272
+ for (k = 0; k < block_length; ++k) {
247
273
  FN(HistogramAdd)(&histograms[j], data[pos++]);
248
274
  }
249
275
  histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
@@ -252,7 +278,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
252
278
  sizes[j] = 1;
253
279
  }
254
280
  num_new_clusters = FN(BrotliHistogramCombine)(
255
- histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
281
+ histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
256
282
  num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
257
283
  BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
258
284
  all_histograms_capacity, all_histograms_size + num_new_clusters);
@@ -273,47 +299,49 @@ static void FN(ClusterBlocks)(MemoryManager* m,
273
299
  }
274
300
  BROTLI_FREE(m, histograms);
275
301
 
302
+ /* Final clustering. */
276
303
  max_num_pairs =
277
304
  BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
278
305
  if (pairs_capacity < max_num_pairs + 1) {
279
306
  BROTLI_FREE(m, pairs);
280
307
  pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
281
- if (BROTLI_IS_OOM(m)) return;
308
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
282
309
  }
283
-
284
310
  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
285
- if (BROTLI_IS_OOM(m)) return;
311
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
286
312
  for (i = 0; i < num_clusters; ++i) {
287
313
  clusters[i] = (uint32_t)i;
288
314
  }
289
315
  num_final_clusters = FN(BrotliHistogramCombine)(
290
- all_histograms, cluster_size, histogram_symbols, clusters, pairs,
316
+ all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
291
317
  num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
292
318
  max_num_pairs);
293
319
  BROTLI_FREE(m, pairs);
294
320
  BROTLI_FREE(m, cluster_size);
295
321
 
322
+ /* Assign blocks to final histograms. */
296
323
  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
297
- if (BROTLI_IS_OOM(m)) return;
324
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
298
325
  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
299
326
  pos = 0;
300
327
  {
301
328
  uint32_t next_index = 0;
302
329
  for (i = 0; i < num_blocks; ++i) {
303
- HistogramType histo;
304
330
  size_t j;
305
331
  uint32_t best_out;
306
332
  double best_bits;
307
- FN(HistogramClear)(&histo);
333
+ FN(HistogramClear)(tmp);
308
334
  for (j = 0; j < block_lengths[i]; ++j) {
309
- FN(HistogramAdd)(&histo, data[pos++]);
335
+ FN(HistogramAdd)(tmp, data[pos++]);
310
336
  }
337
+ /* Among equally good histograms prefer last used. */
338
+ /* TODO(eustas): should we give a block-switch discount here? */
311
339
  best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
312
- best_bits =
313
- FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
340
+ best_bits = FN(BrotliHistogramBitCostDistance)(
341
+ tmp, &all_histograms[best_out], tmp + 1);
314
342
  for (j = 0; j < num_final_clusters; ++j) {
315
343
  const double cur_bits = FN(BrotliHistogramBitCostDistance)(
316
- &histo, &all_histograms[clusters[j]]);
344
+ tmp, &all_histograms[clusters[j]], tmp + 1);
317
345
  if (cur_bits < best_bits) {
318
346
  best_bits = cur_bits;
319
347
  best_out = clusters[j];
@@ -325,6 +353,7 @@ static void FN(ClusterBlocks)(MemoryManager* m,
325
353
  }
326
354
  }
327
355
  }
356
+ BROTLI_FREE(m, tmp);
328
357
  BROTLI_FREE(m, clusters);
329
358
  BROTLI_FREE(m, all_histograms);
330
359
  BROTLI_ENSURE_CAPACITY(
@@ -332,6 +361,9 @@ static void FN(ClusterBlocks)(MemoryManager* m,
332
361
  BROTLI_ENSURE_CAPACITY(
333
362
  m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
334
363
  if (BROTLI_IS_OOM(m)) return;
364
+
365
+ /* Rewrite final assignment to block-split. There might be less blocks
366
+ * than |num_blocks| due to clustering. */
335
367
  {
336
368
  uint32_t cur_length = 0;
337
369
  size_t block_idx = 0;
@@ -352,28 +384,41 @@ static void FN(ClusterBlocks)(MemoryManager* m,
352
384
  split->num_types = (size_t)max_type + 1;
353
385
  }
354
386
  BROTLI_FREE(m, new_index);
355
- BROTLI_FREE(m, block_lengths);
387
+ BROTLI_FREE(m, u32);
356
388
  BROTLI_FREE(m, histogram_symbols);
357
389
  }
358
390
 
391
+ /* Create BlockSplit (partitioning) given the limits, estimates and "effort"
392
+ * parameters.
393
+ *
394
+ * NB: max_histograms is often less than number of histograms allowed by format;
395
+ * this is done intentionally, to save some "space" for context-aware
396
+ * clustering (here entropy is estimated for context-free symbols). */
359
397
  static void FN(SplitByteVector)(MemoryManager* m,
360
398
  const DataType* data, const size_t length,
361
- const size_t literals_per_histogram,
399
+ const size_t symbols_per_histogram,
362
400
  const size_t max_histograms,
363
401
  const size_t sampling_stride_length,
364
402
  const double block_switch_cost,
365
403
  const BrotliEncoderParams* params,
366
404
  BlockSplit* split) {
367
405
  const size_t data_size = FN(HistogramDataSize)();
368
- size_t num_histograms = length / literals_per_histogram + 1;
369
406
  HistogramType* histograms;
407
+ HistogramType* tmp;
408
+ /* Calculate number of histograms; initial estimate is one histogram per
409
+ * specified amount of symbols; however, this value is capped. */
410
+ size_t num_histograms = length / symbols_per_histogram + 1;
370
411
  if (num_histograms > max_histograms) {
371
412
  num_histograms = max_histograms;
372
413
  }
414
+
415
+ /* Corner case: no input. */
373
416
  if (length == 0) {
374
417
  split->num_types = 1;
375
418
  return;
376
- } else if (length < kMinLengthForBlockSplitting) {
419
+ }
420
+
421
+ if (length < kMinLengthForBlockSplitting) {
377
422
  BROTLI_ENSURE_CAPACITY(m, uint8_t,
378
423
  split->types, split->types_alloc_size, split->num_blocks + 1);
379
424
  BROTLI_ENSURE_CAPACITY(m, uint32_t,
@@ -385,15 +430,16 @@ static void FN(SplitByteVector)(MemoryManager* m,
385
430
  split->num_blocks++;
386
431
  return;
387
432
  }
388
- histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
389
- if (BROTLI_IS_OOM(m)) return;
433
+ histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
434
+ tmp = histograms + num_histograms;
435
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
390
436
  /* Find good entropy codes. */
391
437
  FN(InitialEntropyCodes)(data, length,
392
438
  sampling_stride_length,
393
439
  num_histograms, histograms);
394
440
  FN(RefineEntropyCodes)(data, length,
395
441
  sampling_stride_length,
396
- num_histograms, histograms);
442
+ num_histograms, histograms, tmp);
397
443
  {
398
444
  /* Find a good path through literals with the good entropy codes. */
399
445
  uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
@@ -405,7 +451,11 @@ static void FN(SplitByteVector)(MemoryManager* m,
405
451
  uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
406
452
  const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
407
453
  size_t i;
408
- if (BROTLI_IS_OOM(m)) return;
454
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
455
+ BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
456
+ BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
457
+ return;
458
+ }
409
459
  for (i = 0; i < iters; ++i) {
410
460
  num_blocks = FN(FindBlocks)(data, length,
411
461
  block_switch_cost,