isomorfeus-ferret 0.12.6 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -4
  3. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  4. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  5. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  6. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  88. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  89. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  90. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  91. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  92. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  93. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  94. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  95. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  96. data/lib/isomorfeus/ferret/version.rb +1 -1
  97. metadata +88 -3
@@ -0,0 +1,663 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Algorithms for distributing the literals and commands of a metablock between
8
+ block types and contexts. */
9
+
10
+ #include "brotli_enc_metablock.h"
11
+
12
+ #include "brotli_common_constants.h"
13
+ #include "brotli_common_context.h"
14
+ #include "brotli_common_platform.h"
15
+ #include "brotli_types.h"
16
+ #include "brotli_enc_bit_cost.h"
17
+ #include "brotli_enc_block_splitter.h"
18
+ #include "brotli_enc_cluster.h"
19
+ #include "brotli_enc_entropy_encode.h"
20
+ #include "brotli_enc_histogram.h"
21
+ #include "brotli_enc_memory.h"
22
+ #include "brotli_enc_quality.h"
23
+
24
+ #if defined(__cplusplus) || defined(c_plusplus)
25
+ extern "C" {
26
+ #endif
27
+
28
+ void BrotliInitDistanceParams(BrotliEncoderParams* params,
29
+ uint32_t npostfix, uint32_t ndirect) {
30
+ BrotliDistanceParams* dist_params = &params->dist;
31
+ uint32_t alphabet_size_max;
32
+ uint32_t alphabet_size_limit;
33
+ uint32_t max_distance;
34
+
35
+ dist_params->distance_postfix_bits = npostfix;
36
+ dist_params->num_direct_distance_codes = ndirect;
37
+
38
+ alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
39
+ npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
40
+ alphabet_size_limit = alphabet_size_max;
41
+ max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
42
+ (1U << (npostfix + 2));
43
+
44
+ if (params->large_window) {
45
+ BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
46
+ BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
47
+ alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
48
+ npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
49
+ alphabet_size_limit = limit.max_alphabet_size;
50
+ max_distance = limit.max_distance;
51
+ }
52
+
53
+ dist_params->alphabet_size_max = alphabet_size_max;
54
+ dist_params->alphabet_size_limit = alphabet_size_limit;
55
+ dist_params->max_distance = max_distance;
56
+ }
57
+
58
+ static void RecomputeDistancePrefixes(Command* cmds,
59
+ size_t num_commands,
60
+ const BrotliDistanceParams* orig_params,
61
+ const BrotliDistanceParams* new_params) {
62
+ size_t i;
63
+
64
+ if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
65
+ orig_params->num_direct_distance_codes ==
66
+ new_params->num_direct_distance_codes) {
67
+ return;
68
+ }
69
+
70
+ for (i = 0; i < num_commands; ++i) {
71
+ Command* cmd = &cmds[i];
72
+ if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
73
+ PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd, orig_params),
74
+ new_params->num_direct_distance_codes,
75
+ new_params->distance_postfix_bits,
76
+ &cmd->dist_prefix_,
77
+ &cmd->dist_extra_);
78
+ }
79
+ }
80
+ }
81
+
82
+ static BROTLI_BOOL ComputeDistanceCost(const Command* cmds,
83
+ size_t num_commands,
84
+ const BrotliDistanceParams* orig_params,
85
+ const BrotliDistanceParams* new_params,
86
+ double* cost) {
87
+ size_t i;
88
+ BROTLI_BOOL equal_params = BROTLI_FALSE;
89
+ uint16_t dist_prefix;
90
+ uint32_t dist_extra;
91
+ double extra_bits = 0.0;
92
+ HistogramDistance histo;
93
+ HistogramClearDistance(&histo);
94
+
95
+ if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
96
+ orig_params->num_direct_distance_codes ==
97
+ new_params->num_direct_distance_codes) {
98
+ equal_params = BROTLI_TRUE;
99
+ }
100
+
101
+ for (i = 0; i < num_commands; i++) {
102
+ const Command* cmd = &cmds[i];
103
+ if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
104
+ if (equal_params) {
105
+ dist_prefix = cmd->dist_prefix_;
106
+ } else {
107
+ uint32_t distance = CommandRestoreDistanceCode(cmd, orig_params);
108
+ if (distance > new_params->max_distance) {
109
+ return BROTLI_FALSE;
110
+ }
111
+ PrefixEncodeCopyDistance(distance,
112
+ new_params->num_direct_distance_codes,
113
+ new_params->distance_postfix_bits,
114
+ &dist_prefix,
115
+ &dist_extra);
116
+ }
117
+ HistogramAddDistance(&histo, dist_prefix & 0x3FF);
118
+ extra_bits += dist_prefix >> 10;
119
+ }
120
+ }
121
+
122
+ *cost = BrotliPopulationCostDistance(&histo) + extra_bits;
123
+ return BROTLI_TRUE;
124
+ }
125
+
126
+ void BrotliBuildMetaBlock(MemoryManager* m,
127
+ const uint8_t* ringbuffer,
128
+ const size_t pos,
129
+ const size_t mask,
130
+ BrotliEncoderParams* params,
131
+ uint8_t prev_byte,
132
+ uint8_t prev_byte2,
133
+ Command* cmds,
134
+ size_t num_commands,
135
+ ContextType literal_context_mode,
136
+ MetaBlockSplit* mb) {
137
+ /* Histogram ids need to fit in one byte. */
138
+ static const size_t kMaxNumberOfHistograms = 256;
139
+ HistogramDistance* distance_histograms;
140
+ HistogramLiteral* literal_histograms;
141
+ ContextType* literal_context_modes = NULL;
142
+ size_t literal_histograms_size;
143
+ size_t distance_histograms_size;
144
+ size_t i;
145
+ size_t literal_context_multiplier = 1;
146
+ uint32_t npostfix;
147
+ uint32_t ndirect_msb = 0;
148
+ BROTLI_BOOL check_orig = BROTLI_TRUE;
149
+ double best_dist_cost = 1e99;
150
+ BrotliEncoderParams orig_params = *params;
151
+ BrotliEncoderParams new_params = *params;
152
+
153
+ for (npostfix = 0; npostfix <= BROTLI_MAX_NPOSTFIX; npostfix++) {
154
+ for (; ndirect_msb < 16; ndirect_msb++) {
155
+ uint32_t ndirect = ndirect_msb << npostfix;
156
+ BROTLI_BOOL skip;
157
+ double dist_cost;
158
+ BrotliInitDistanceParams(&new_params, npostfix, ndirect);
159
+ if (npostfix == orig_params.dist.distance_postfix_bits &&
160
+ ndirect == orig_params.dist.num_direct_distance_codes) {
161
+ check_orig = BROTLI_FALSE;
162
+ }
163
+ skip = !ComputeDistanceCost(
164
+ cmds, num_commands,
165
+ &orig_params.dist, &new_params.dist, &dist_cost);
166
+ if (skip || (dist_cost > best_dist_cost)) {
167
+ break;
168
+ }
169
+ best_dist_cost = dist_cost;
170
+ params->dist = new_params.dist;
171
+ }
172
+ if (ndirect_msb > 0) ndirect_msb--;
173
+ ndirect_msb /= 2;
174
+ }
175
+ if (check_orig) {
176
+ double dist_cost;
177
+ ComputeDistanceCost(cmds, num_commands,
178
+ &orig_params.dist, &orig_params.dist, &dist_cost);
179
+ if (dist_cost < best_dist_cost) {
180
+ /* NB: currently unused; uncomment when more param tuning is added. */
181
+ /* best_dist_cost = dist_cost; */
182
+ params->dist = orig_params.dist;
183
+ }
184
+ }
185
+ RecomputeDistancePrefixes(cmds, num_commands,
186
+ &orig_params.dist, &params->dist);
187
+
188
+ BrotliSplitBlock(m, cmds, num_commands,
189
+ ringbuffer, pos, mask, params,
190
+ &mb->literal_split,
191
+ &mb->command_split,
192
+ &mb->distance_split);
193
+ if (BROTLI_IS_OOM(m)) return;
194
+
195
+ if (!params->disable_literal_context_modeling) {
196
+ literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
197
+ literal_context_modes =
198
+ BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
199
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_context_modes)) return;
200
+ for (i = 0; i < mb->literal_split.num_types; ++i) {
201
+ literal_context_modes[i] = literal_context_mode;
202
+ }
203
+ }
204
+
205
+ literal_histograms_size =
206
+ mb->literal_split.num_types * literal_context_multiplier;
207
+ literal_histograms =
208
+ BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
209
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_histograms)) return;
210
+ ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
211
+
212
+ distance_histograms_size =
213
+ mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
214
+ distance_histograms =
215
+ BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
216
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_histograms)) return;
217
+ ClearHistogramsDistance(distance_histograms, distance_histograms_size);
218
+
219
+ BROTLI_DCHECK(mb->command_histograms == 0);
220
+ mb->command_histograms_size = mb->command_split.num_types;
221
+ mb->command_histograms =
222
+ BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
223
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->command_histograms)) return;
224
+ ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
225
+
226
+ BrotliBuildHistogramsWithContext(cmds, num_commands,
227
+ &mb->literal_split, &mb->command_split, &mb->distance_split,
228
+ ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
229
+ literal_histograms, mb->command_histograms, distance_histograms);
230
+ BROTLI_FREE(m, literal_context_modes);
231
+
232
+ BROTLI_DCHECK(mb->literal_context_map == 0);
233
+ mb->literal_context_map_size =
234
+ mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
235
+ mb->literal_context_map =
236
+ BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
237
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
238
+
239
+ BROTLI_DCHECK(mb->literal_histograms == 0);
240
+ mb->literal_histograms_size = mb->literal_context_map_size;
241
+ mb->literal_histograms =
242
+ BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
243
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_histograms)) return;
244
+
245
+ BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
246
+ kMaxNumberOfHistograms, mb->literal_histograms,
247
+ &mb->literal_histograms_size, mb->literal_context_map);
248
+ if (BROTLI_IS_OOM(m)) return;
249
+ BROTLI_FREE(m, literal_histograms);
250
+
251
+ if (params->disable_literal_context_modeling) {
252
+ /* Distribute assignment to all contexts. */
253
+ for (i = mb->literal_split.num_types; i != 0;) {
254
+ size_t j = 0;
255
+ i--;
256
+ for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
257
+ mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
258
+ mb->literal_context_map[i];
259
+ }
260
+ }
261
+ }
262
+
263
+ BROTLI_DCHECK(mb->distance_context_map == 0);
264
+ mb->distance_context_map_size =
265
+ mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
266
+ mb->distance_context_map =
267
+ BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
268
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_context_map)) return;
269
+
270
+ BROTLI_DCHECK(mb->distance_histograms == 0);
271
+ mb->distance_histograms_size = mb->distance_context_map_size;
272
+ mb->distance_histograms =
273
+ BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
274
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_histograms)) return;
275
+
276
+ BrotliClusterHistogramsDistance(m, distance_histograms,
277
+ mb->distance_context_map_size,
278
+ kMaxNumberOfHistograms,
279
+ mb->distance_histograms,
280
+ &mb->distance_histograms_size,
281
+ mb->distance_context_map);
282
+ if (BROTLI_IS_OOM(m)) return;
283
+ BROTLI_FREE(m, distance_histograms);
284
+ }
285
+
286
+ #define FN(X) X ## Literal
287
+ #include "brotli_enc_metablock_inc.h" /* NOLINT(build/include) */
288
+ #undef FN
289
+
290
+ #define FN(X) X ## Command
291
+ #include "brotli_enc_metablock_inc.h" /* NOLINT(build/include) */
292
+ #undef FN
293
+
294
+ #define FN(X) X ## Distance
295
+ #include "brotli_enc_metablock_inc.h" /* NOLINT(build/include) */
296
+ #undef FN
297
+
298
+ #define BROTLI_MAX_STATIC_CONTEXTS 13
299
+
300
+ /* Greedy block splitter for one block category (literal, command or distance).
301
+ Gathers histograms for all context buckets. */
302
+ typedef struct ContextBlockSplitter {
303
+ /* Alphabet size of particular block category. */
304
+ size_t alphabet_size_;
305
+ size_t num_contexts_;
306
+ size_t max_block_types_;
307
+ /* We collect at least this many symbols for each block. */
308
+ size_t min_block_size_;
309
+ /* We merge histograms A and B if
310
+ entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
311
+ where A is the current histogram and B is the histogram of the last or the
312
+ second last block type. */
313
+ double split_threshold_;
314
+
315
+ size_t num_blocks_;
316
+ BlockSplit* split_; /* not owned */
317
+ HistogramLiteral* histograms_; /* not owned */
318
+ size_t* histograms_size_; /* not owned */
319
+
320
+ /* The number of symbols that we want to collect before deciding on whether
321
+ or not to merge the block with a previous one or emit a new block. */
322
+ size_t target_block_size_;
323
+ /* The number of symbols in the current histogram. */
324
+ size_t block_size_;
325
+ /* Offset of the current histogram. */
326
+ size_t curr_histogram_ix_;
327
+ /* Offset of the histograms of the previous two block types. */
328
+ size_t last_histogram_ix_[2];
329
+ /* Entropy of the previous two block types. */
330
+ double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
331
+ /* The number of times we merged the current block with the last one. */
332
+ size_t merge_last_count_;
333
+ } ContextBlockSplitter;
334
+
335
+ static void InitContextBlockSplitter(
336
+ MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
337
+ size_t num_contexts, size_t min_block_size, double split_threshold,
338
+ size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
339
+ size_t* histograms_size) {
340
+ size_t max_num_blocks = num_symbols / min_block_size + 1;
341
+ size_t max_num_types;
342
+ BROTLI_DCHECK(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
343
+
344
+ self->alphabet_size_ = alphabet_size;
345
+ self->num_contexts_ = num_contexts;
346
+ self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
347
+ self->min_block_size_ = min_block_size;
348
+ self->split_threshold_ = split_threshold;
349
+ self->num_blocks_ = 0;
350
+ self->split_ = split;
351
+ self->histograms_size_ = histograms_size;
352
+ self->target_block_size_ = min_block_size;
353
+ self->block_size_ = 0;
354
+ self->curr_histogram_ix_ = 0;
355
+ self->merge_last_count_ = 0;
356
+
357
+ /* We have to allocate one more histogram than the maximum number of block
358
+ types for the current histogram when the meta-block is too big. */
359
+ max_num_types =
360
+ BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
361
+ BROTLI_ENSURE_CAPACITY(m, uint8_t,
362
+ split->types, split->types_alloc_size, max_num_blocks);
363
+ BROTLI_ENSURE_CAPACITY(m, uint32_t,
364
+ split->lengths, split->lengths_alloc_size, max_num_blocks);
365
+ if (BROTLI_IS_OOM(m)) return;
366
+ split->num_blocks = max_num_blocks;
367
+ if (BROTLI_IS_OOM(m)) return;
368
+ BROTLI_DCHECK(*histograms == 0);
369
+ *histograms_size = max_num_types * num_contexts;
370
+ *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
371
+ self->histograms_ = *histograms;
372
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
373
+ /* Clear only current histogram. */
374
+ ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
375
+ self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
376
+ }
377
+
378
+ /* Does either of three things:
379
+ (1) emits the current block with a new block type;
380
+ (2) emits the current block with the type of the second last block;
381
+ (3) merges the current block with the last block. */
382
+ static void ContextBlockSplitterFinishBlock(
383
+ ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
384
+ BlockSplit* split = self->split_;
385
+ const size_t num_contexts = self->num_contexts_;
386
+ double* last_entropy = self->last_entropy_;
387
+ HistogramLiteral* histograms = self->histograms_;
388
+
389
+ if (self->block_size_ < self->min_block_size_) {
390
+ self->block_size_ = self->min_block_size_;
391
+ }
392
+ if (self->num_blocks_ == 0) {
393
+ size_t i;
394
+ /* Create first block. */
395
+ split->lengths[0] = (uint32_t)self->block_size_;
396
+ split->types[0] = 0;
397
+
398
+ for (i = 0; i < num_contexts; ++i) {
399
+ last_entropy[i] =
400
+ BitsEntropy(histograms[i].data_, self->alphabet_size_);
401
+ last_entropy[num_contexts + i] = last_entropy[i];
402
+ }
403
+ ++self->num_blocks_;
404
+ ++split->num_types;
405
+ self->curr_histogram_ix_ += num_contexts;
406
+ if (self->curr_histogram_ix_ < *self->histograms_size_) {
407
+ ClearHistogramsLiteral(
408
+ &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
409
+ }
410
+ self->block_size_ = 0;
411
+ } else if (self->block_size_ > 0) {
412
+ /* Try merging the set of histograms for the current block type with the
413
+ respective set of histograms for the last and second last block types.
414
+ Decide over the split based on the total reduction of entropy across
415
+ all contexts. */
416
+ double entropy[BROTLI_MAX_STATIC_CONTEXTS];
417
+ HistogramLiteral* combined_histo =
418
+ BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
419
+ double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
420
+ double diff[2] = { 0.0 };
421
+ size_t i;
422
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(combined_histo)) return;
423
+ for (i = 0; i < num_contexts; ++i) {
424
+ size_t curr_histo_ix = self->curr_histogram_ix_ + i;
425
+ size_t j;
426
+ entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
427
+ self->alphabet_size_);
428
+ for (j = 0; j < 2; ++j) {
429
+ size_t jx = j * num_contexts + i;
430
+ size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
431
+ combined_histo[jx] = histograms[curr_histo_ix];
432
+ HistogramAddHistogramLiteral(&combined_histo[jx],
433
+ &histograms[last_histogram_ix]);
434
+ combined_entropy[jx] = BitsEntropy(
435
+ &combined_histo[jx].data_[0], self->alphabet_size_);
436
+ diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
437
+ }
438
+ }
439
+
440
+ if (split->num_types < self->max_block_types_ &&
441
+ diff[0] > self->split_threshold_ &&
442
+ diff[1] > self->split_threshold_) {
443
+ /* Create new block. */
444
+ split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
445
+ split->types[self->num_blocks_] = (uint8_t)split->num_types;
446
+ self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
447
+ self->last_histogram_ix_[0] = split->num_types * num_contexts;
448
+ for (i = 0; i < num_contexts; ++i) {
449
+ last_entropy[num_contexts + i] = last_entropy[i];
450
+ last_entropy[i] = entropy[i];
451
+ }
452
+ ++self->num_blocks_;
453
+ ++split->num_types;
454
+ self->curr_histogram_ix_ += num_contexts;
455
+ if (self->curr_histogram_ix_ < *self->histograms_size_) {
456
+ ClearHistogramsLiteral(
457
+ &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
458
+ }
459
+ self->block_size_ = 0;
460
+ self->merge_last_count_ = 0;
461
+ self->target_block_size_ = self->min_block_size_;
462
+ } else if (diff[1] < diff[0] - 20.0) {
463
+ /* Combine this block with second last block. */
464
+ split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
465
+ split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
466
+ BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
467
+ for (i = 0; i < num_contexts; ++i) {
468
+ histograms[self->last_histogram_ix_[0] + i] =
469
+ combined_histo[num_contexts + i];
470
+ last_entropy[num_contexts + i] = last_entropy[i];
471
+ last_entropy[i] = combined_entropy[num_contexts + i];
472
+ HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
473
+ }
474
+ ++self->num_blocks_;
475
+ self->block_size_ = 0;
476
+ self->merge_last_count_ = 0;
477
+ self->target_block_size_ = self->min_block_size_;
478
+ } else {
479
+ /* Combine this block with last block. */
480
+ split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
481
+ for (i = 0; i < num_contexts; ++i) {
482
+ histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
483
+ last_entropy[i] = combined_entropy[i];
484
+ if (split->num_types == 1) {
485
+ last_entropy[num_contexts + i] = last_entropy[i];
486
+ }
487
+ HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
488
+ }
489
+ self->block_size_ = 0;
490
+ if (++self->merge_last_count_ > 1) {
491
+ self->target_block_size_ += self->min_block_size_;
492
+ }
493
+ }
494
+ BROTLI_FREE(m, combined_histo);
495
+ }
496
+ if (is_final) {
497
+ *self->histograms_size_ = split->num_types * num_contexts;
498
+ split->num_blocks = self->num_blocks_;
499
+ }
500
+ }
501
+
502
+ /* Adds the next symbol to the current block type and context. When the
503
+ current block reaches the target size, decides on merging the block. */
504
+ static void ContextBlockSplitterAddSymbol(
505
+ ContextBlockSplitter* self, MemoryManager* m,
506
+ size_t symbol, size_t context) {
507
+ HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
508
+ symbol);
509
+ ++self->block_size_;
510
+ if (self->block_size_ == self->target_block_size_) {
511
+ ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
512
+ if (BROTLI_IS_OOM(m)) return;
513
+ }
514
+ }
515
+
516
+ static void MapStaticContexts(MemoryManager* m,
517
+ size_t num_contexts,
518
+ const uint32_t* static_context_map,
519
+ MetaBlockSplit* mb) {
520
+ size_t i;
521
+ BROTLI_DCHECK(mb->literal_context_map == 0);
522
+ mb->literal_context_map_size =
523
+ mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
524
+ mb->literal_context_map =
525
+ BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
526
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
527
+
528
+ for (i = 0; i < mb->literal_split.num_types; ++i) {
529
+ uint32_t offset = (uint32_t)(i * num_contexts);
530
+ size_t j;
531
+ for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
532
+ mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
533
+ offset + static_context_map[j];
534
+ }
535
+ }
536
+ }
537
+
538
+ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
539
+ MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
540
+ uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
541
+ const size_t num_contexts, const uint32_t* static_context_map,
542
+ const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
543
+ union {
544
+ BlockSplitterLiteral plain;
545
+ ContextBlockSplitter ctx;
546
+ } lit_blocks;
547
+ BlockSplitterCommand cmd_blocks;
548
+ BlockSplitterDistance dist_blocks;
549
+ size_t num_literals = 0;
550
+ size_t i;
551
+ for (i = 0; i < n_commands; ++i) {
552
+ num_literals += commands[i].insert_len_;
553
+ }
554
+
555
+ if (num_contexts == 1) {
556
+ InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
557
+ num_literals, &mb->literal_split, &mb->literal_histograms,
558
+ &mb->literal_histograms_size);
559
+ } else {
560
+ InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
561
+ num_literals, &mb->literal_split, &mb->literal_histograms,
562
+ &mb->literal_histograms_size);
563
+ }
564
+ if (BROTLI_IS_OOM(m)) return;
565
+ InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
566
+ 500.0, n_commands, &mb->command_split, &mb->command_histograms,
567
+ &mb->command_histograms_size);
568
+ if (BROTLI_IS_OOM(m)) return;
569
+ InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
570
+ &mb->distance_split, &mb->distance_histograms,
571
+ &mb->distance_histograms_size);
572
+ if (BROTLI_IS_OOM(m)) return;
573
+
574
+ for (i = 0; i < n_commands; ++i) {
575
+ const Command cmd = commands[i];
576
+ size_t j;
577
+ BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
578
+ for (j = cmd.insert_len_; j != 0; --j) {
579
+ uint8_t literal = ringbuffer[pos & mask];
580
+ if (num_contexts == 1) {
581
+ BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
582
+ } else {
583
+ size_t context =
584
+ BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
585
+ ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
586
+ static_context_map[context]);
587
+ if (BROTLI_IS_OOM(m)) return;
588
+ }
589
+ prev_byte2 = prev_byte;
590
+ prev_byte = literal;
591
+ ++pos;
592
+ }
593
+ pos += CommandCopyLen(&cmd);
594
+ if (CommandCopyLen(&cmd)) {
595
+ prev_byte2 = ringbuffer[(pos - 2) & mask];
596
+ prev_byte = ringbuffer[(pos - 1) & mask];
597
+ if (cmd.cmd_prefix_ >= 128) {
598
+ BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
599
+ }
600
+ }
601
+ }
602
+
603
+ if (num_contexts == 1) {
604
+ BlockSplitterFinishBlockLiteral(
605
+ &lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
606
+ } else {
607
+ ContextBlockSplitterFinishBlock(
608
+ &lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
609
+ if (BROTLI_IS_OOM(m)) return;
610
+ }
611
+ BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
612
+ BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
613
+
614
+ if (num_contexts > 1) {
615
+ MapStaticContexts(m, num_contexts, static_context_map, mb);
616
+ }
617
+ }
618
+
619
+ void BrotliBuildMetaBlockGreedy(MemoryManager* m,
620
+ const uint8_t* ringbuffer,
621
+ size_t pos,
622
+ size_t mask,
623
+ uint8_t prev_byte,
624
+ uint8_t prev_byte2,
625
+ ContextLut literal_context_lut,
626
+ size_t num_contexts,
627
+ const uint32_t* static_context_map,
628
+ const Command* commands,
629
+ size_t n_commands,
630
+ MetaBlockSplit* mb) {
631
+ if (num_contexts == 1) {
632
+ BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
633
+ prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
634
+ } else {
635
+ BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
636
+ prev_byte2, literal_context_lut, num_contexts, static_context_map,
637
+ commands, n_commands, mb);
638
+ }
639
+ }
640
+
641
+ void BrotliOptimizeHistograms(uint32_t num_distance_codes,
642
+ MetaBlockSplit* mb) {
643
+ uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
644
+ size_t i;
645
+ for (i = 0; i < mb->literal_histograms_size; ++i) {
646
+ BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
647
+ good_for_rle);
648
+ }
649
+ for (i = 0; i < mb->command_histograms_size; ++i) {
650
+ BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
651
+ mb->command_histograms[i].data_,
652
+ good_for_rle);
653
+ }
654
+ for (i = 0; i < mb->distance_histograms_size; ++i) {
655
+ BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
656
+ mb->distance_histograms[i].data_,
657
+ good_for_rle);
658
+ }
659
+ }
660
+
661
+ #if defined(__cplusplus) || defined(c_plusplus)
662
+ } /* extern "C" */
663
+ #endif