isomorfeus-ferret 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -4
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +88 -3
@@ -0,0 +1,663 @@
|
|
1
|
+
/* Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Algorithms for distributing the literals and commands of a metablock between
|
8
|
+
block types and contexts. */
|
9
|
+
|
10
|
+
#include "brotli_enc_metablock.h"
|
11
|
+
|
12
|
+
#include "brotli_common_constants.h"
|
13
|
+
#include "brotli_common_context.h"
|
14
|
+
#include "brotli_common_platform.h"
|
15
|
+
#include "brotli_types.h"
|
16
|
+
#include "brotli_enc_bit_cost.h"
|
17
|
+
#include "brotli_enc_block_splitter.h"
|
18
|
+
#include "brotli_enc_cluster.h"
|
19
|
+
#include "brotli_enc_entropy_encode.h"
|
20
|
+
#include "brotli_enc_histogram.h"
|
21
|
+
#include "brotli_enc_memory.h"
|
22
|
+
#include "brotli_enc_quality.h"
|
23
|
+
|
24
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
25
|
+
extern "C" {
|
26
|
+
#endif
|
27
|
+
|
28
|
+
void BrotliInitDistanceParams(BrotliEncoderParams* params,
|
29
|
+
uint32_t npostfix, uint32_t ndirect) {
|
30
|
+
BrotliDistanceParams* dist_params = ¶ms->dist;
|
31
|
+
uint32_t alphabet_size_max;
|
32
|
+
uint32_t alphabet_size_limit;
|
33
|
+
uint32_t max_distance;
|
34
|
+
|
35
|
+
dist_params->distance_postfix_bits = npostfix;
|
36
|
+
dist_params->num_direct_distance_codes = ndirect;
|
37
|
+
|
38
|
+
alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
|
39
|
+
npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
|
40
|
+
alphabet_size_limit = alphabet_size_max;
|
41
|
+
max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
|
42
|
+
(1U << (npostfix + 2));
|
43
|
+
|
44
|
+
if (params->large_window) {
|
45
|
+
BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
|
46
|
+
BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
|
47
|
+
alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
|
48
|
+
npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
|
49
|
+
alphabet_size_limit = limit.max_alphabet_size;
|
50
|
+
max_distance = limit.max_distance;
|
51
|
+
}
|
52
|
+
|
53
|
+
dist_params->alphabet_size_max = alphabet_size_max;
|
54
|
+
dist_params->alphabet_size_limit = alphabet_size_limit;
|
55
|
+
dist_params->max_distance = max_distance;
|
56
|
+
}
|
57
|
+
|
58
|
+
static void RecomputeDistancePrefixes(Command* cmds,
|
59
|
+
size_t num_commands,
|
60
|
+
const BrotliDistanceParams* orig_params,
|
61
|
+
const BrotliDistanceParams* new_params) {
|
62
|
+
size_t i;
|
63
|
+
|
64
|
+
if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
|
65
|
+
orig_params->num_direct_distance_codes ==
|
66
|
+
new_params->num_direct_distance_codes) {
|
67
|
+
return;
|
68
|
+
}
|
69
|
+
|
70
|
+
for (i = 0; i < num_commands; ++i) {
|
71
|
+
Command* cmd = &cmds[i];
|
72
|
+
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
|
73
|
+
PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd, orig_params),
|
74
|
+
new_params->num_direct_distance_codes,
|
75
|
+
new_params->distance_postfix_bits,
|
76
|
+
&cmd->dist_prefix_,
|
77
|
+
&cmd->dist_extra_);
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
static BROTLI_BOOL ComputeDistanceCost(const Command* cmds,
|
83
|
+
size_t num_commands,
|
84
|
+
const BrotliDistanceParams* orig_params,
|
85
|
+
const BrotliDistanceParams* new_params,
|
86
|
+
double* cost) {
|
87
|
+
size_t i;
|
88
|
+
BROTLI_BOOL equal_params = BROTLI_FALSE;
|
89
|
+
uint16_t dist_prefix;
|
90
|
+
uint32_t dist_extra;
|
91
|
+
double extra_bits = 0.0;
|
92
|
+
HistogramDistance histo;
|
93
|
+
HistogramClearDistance(&histo);
|
94
|
+
|
95
|
+
if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
|
96
|
+
orig_params->num_direct_distance_codes ==
|
97
|
+
new_params->num_direct_distance_codes) {
|
98
|
+
equal_params = BROTLI_TRUE;
|
99
|
+
}
|
100
|
+
|
101
|
+
for (i = 0; i < num_commands; i++) {
|
102
|
+
const Command* cmd = &cmds[i];
|
103
|
+
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
|
104
|
+
if (equal_params) {
|
105
|
+
dist_prefix = cmd->dist_prefix_;
|
106
|
+
} else {
|
107
|
+
uint32_t distance = CommandRestoreDistanceCode(cmd, orig_params);
|
108
|
+
if (distance > new_params->max_distance) {
|
109
|
+
return BROTLI_FALSE;
|
110
|
+
}
|
111
|
+
PrefixEncodeCopyDistance(distance,
|
112
|
+
new_params->num_direct_distance_codes,
|
113
|
+
new_params->distance_postfix_bits,
|
114
|
+
&dist_prefix,
|
115
|
+
&dist_extra);
|
116
|
+
}
|
117
|
+
HistogramAddDistance(&histo, dist_prefix & 0x3FF);
|
118
|
+
extra_bits += dist_prefix >> 10;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
*cost = BrotliPopulationCostDistance(&histo) + extra_bits;
|
123
|
+
return BROTLI_TRUE;
|
124
|
+
}
|
125
|
+
|
126
|
+
void BrotliBuildMetaBlock(MemoryManager* m,
|
127
|
+
const uint8_t* ringbuffer,
|
128
|
+
const size_t pos,
|
129
|
+
const size_t mask,
|
130
|
+
BrotliEncoderParams* params,
|
131
|
+
uint8_t prev_byte,
|
132
|
+
uint8_t prev_byte2,
|
133
|
+
Command* cmds,
|
134
|
+
size_t num_commands,
|
135
|
+
ContextType literal_context_mode,
|
136
|
+
MetaBlockSplit* mb) {
|
137
|
+
/* Histogram ids need to fit in one byte. */
|
138
|
+
static const size_t kMaxNumberOfHistograms = 256;
|
139
|
+
HistogramDistance* distance_histograms;
|
140
|
+
HistogramLiteral* literal_histograms;
|
141
|
+
ContextType* literal_context_modes = NULL;
|
142
|
+
size_t literal_histograms_size;
|
143
|
+
size_t distance_histograms_size;
|
144
|
+
size_t i;
|
145
|
+
size_t literal_context_multiplier = 1;
|
146
|
+
uint32_t npostfix;
|
147
|
+
uint32_t ndirect_msb = 0;
|
148
|
+
BROTLI_BOOL check_orig = BROTLI_TRUE;
|
149
|
+
double best_dist_cost = 1e99;
|
150
|
+
BrotliEncoderParams orig_params = *params;
|
151
|
+
BrotliEncoderParams new_params = *params;
|
152
|
+
|
153
|
+
for (npostfix = 0; npostfix <= BROTLI_MAX_NPOSTFIX; npostfix++) {
|
154
|
+
for (; ndirect_msb < 16; ndirect_msb++) {
|
155
|
+
uint32_t ndirect = ndirect_msb << npostfix;
|
156
|
+
BROTLI_BOOL skip;
|
157
|
+
double dist_cost;
|
158
|
+
BrotliInitDistanceParams(&new_params, npostfix, ndirect);
|
159
|
+
if (npostfix == orig_params.dist.distance_postfix_bits &&
|
160
|
+
ndirect == orig_params.dist.num_direct_distance_codes) {
|
161
|
+
check_orig = BROTLI_FALSE;
|
162
|
+
}
|
163
|
+
skip = !ComputeDistanceCost(
|
164
|
+
cmds, num_commands,
|
165
|
+
&orig_params.dist, &new_params.dist, &dist_cost);
|
166
|
+
if (skip || (dist_cost > best_dist_cost)) {
|
167
|
+
break;
|
168
|
+
}
|
169
|
+
best_dist_cost = dist_cost;
|
170
|
+
params->dist = new_params.dist;
|
171
|
+
}
|
172
|
+
if (ndirect_msb > 0) ndirect_msb--;
|
173
|
+
ndirect_msb /= 2;
|
174
|
+
}
|
175
|
+
if (check_orig) {
|
176
|
+
double dist_cost;
|
177
|
+
ComputeDistanceCost(cmds, num_commands,
|
178
|
+
&orig_params.dist, &orig_params.dist, &dist_cost);
|
179
|
+
if (dist_cost < best_dist_cost) {
|
180
|
+
/* NB: currently unused; uncomment when more param tuning is added. */
|
181
|
+
/* best_dist_cost = dist_cost; */
|
182
|
+
params->dist = orig_params.dist;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
RecomputeDistancePrefixes(cmds, num_commands,
|
186
|
+
&orig_params.dist, ¶ms->dist);
|
187
|
+
|
188
|
+
BrotliSplitBlock(m, cmds, num_commands,
|
189
|
+
ringbuffer, pos, mask, params,
|
190
|
+
&mb->literal_split,
|
191
|
+
&mb->command_split,
|
192
|
+
&mb->distance_split);
|
193
|
+
if (BROTLI_IS_OOM(m)) return;
|
194
|
+
|
195
|
+
if (!params->disable_literal_context_modeling) {
|
196
|
+
literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
|
197
|
+
literal_context_modes =
|
198
|
+
BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
|
199
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_context_modes)) return;
|
200
|
+
for (i = 0; i < mb->literal_split.num_types; ++i) {
|
201
|
+
literal_context_modes[i] = literal_context_mode;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
literal_histograms_size =
|
206
|
+
mb->literal_split.num_types * literal_context_multiplier;
|
207
|
+
literal_histograms =
|
208
|
+
BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
|
209
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_histograms)) return;
|
210
|
+
ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
|
211
|
+
|
212
|
+
distance_histograms_size =
|
213
|
+
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
|
214
|
+
distance_histograms =
|
215
|
+
BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
|
216
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_histograms)) return;
|
217
|
+
ClearHistogramsDistance(distance_histograms, distance_histograms_size);
|
218
|
+
|
219
|
+
BROTLI_DCHECK(mb->command_histograms == 0);
|
220
|
+
mb->command_histograms_size = mb->command_split.num_types;
|
221
|
+
mb->command_histograms =
|
222
|
+
BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
|
223
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->command_histograms)) return;
|
224
|
+
ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
|
225
|
+
|
226
|
+
BrotliBuildHistogramsWithContext(cmds, num_commands,
|
227
|
+
&mb->literal_split, &mb->command_split, &mb->distance_split,
|
228
|
+
ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
|
229
|
+
literal_histograms, mb->command_histograms, distance_histograms);
|
230
|
+
BROTLI_FREE(m, literal_context_modes);
|
231
|
+
|
232
|
+
BROTLI_DCHECK(mb->literal_context_map == 0);
|
233
|
+
mb->literal_context_map_size =
|
234
|
+
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
235
|
+
mb->literal_context_map =
|
236
|
+
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
|
237
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
|
238
|
+
|
239
|
+
BROTLI_DCHECK(mb->literal_histograms == 0);
|
240
|
+
mb->literal_histograms_size = mb->literal_context_map_size;
|
241
|
+
mb->literal_histograms =
|
242
|
+
BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
|
243
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_histograms)) return;
|
244
|
+
|
245
|
+
BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
|
246
|
+
kMaxNumberOfHistograms, mb->literal_histograms,
|
247
|
+
&mb->literal_histograms_size, mb->literal_context_map);
|
248
|
+
if (BROTLI_IS_OOM(m)) return;
|
249
|
+
BROTLI_FREE(m, literal_histograms);
|
250
|
+
|
251
|
+
if (params->disable_literal_context_modeling) {
|
252
|
+
/* Distribute assignment to all contexts. */
|
253
|
+
for (i = mb->literal_split.num_types; i != 0;) {
|
254
|
+
size_t j = 0;
|
255
|
+
i--;
|
256
|
+
for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
|
257
|
+
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
|
258
|
+
mb->literal_context_map[i];
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
BROTLI_DCHECK(mb->distance_context_map == 0);
|
264
|
+
mb->distance_context_map_size =
|
265
|
+
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
|
266
|
+
mb->distance_context_map =
|
267
|
+
BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
|
268
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_context_map)) return;
|
269
|
+
|
270
|
+
BROTLI_DCHECK(mb->distance_histograms == 0);
|
271
|
+
mb->distance_histograms_size = mb->distance_context_map_size;
|
272
|
+
mb->distance_histograms =
|
273
|
+
BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
|
274
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_histograms)) return;
|
275
|
+
|
276
|
+
BrotliClusterHistogramsDistance(m, distance_histograms,
|
277
|
+
mb->distance_context_map_size,
|
278
|
+
kMaxNumberOfHistograms,
|
279
|
+
mb->distance_histograms,
|
280
|
+
&mb->distance_histograms_size,
|
281
|
+
mb->distance_context_map);
|
282
|
+
if (BROTLI_IS_OOM(m)) return;
|
283
|
+
BROTLI_FREE(m, distance_histograms);
|
284
|
+
}
|
285
|
+
|
286
|
+
#define FN(X) X ## Literal
|
287
|
+
#include "brotli_enc_metablock_inc.h" /* NOLINT(build/include) */
|
288
|
+
#undef FN
|
289
|
+
|
290
|
+
#define FN(X) X ## Command
|
291
|
+
#include "brotli_enc_metablock_inc.h" /* NOLINT(build/include) */
|
292
|
+
#undef FN
|
293
|
+
|
294
|
+
#define FN(X) X ## Distance
|
295
|
+
#include "brotli_enc_metablock_inc.h" /* NOLINT(build/include) */
|
296
|
+
#undef FN
|
297
|
+
|
298
|
+
#define BROTLI_MAX_STATIC_CONTEXTS 13
|
299
|
+
|
300
|
+
/* Greedy block splitter for one block category (literal, command or distance).
|
301
|
+
Gathers histograms for all context buckets. */
|
302
|
+
typedef struct ContextBlockSplitter {
|
303
|
+
/* Alphabet size of particular block category. */
|
304
|
+
size_t alphabet_size_;
|
305
|
+
size_t num_contexts_;
|
306
|
+
size_t max_block_types_;
|
307
|
+
/* We collect at least this many symbols for each block. */
|
308
|
+
size_t min_block_size_;
|
309
|
+
/* We merge histograms A and B if
|
310
|
+
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
311
|
+
where A is the current histogram and B is the histogram of the last or the
|
312
|
+
second last block type. */
|
313
|
+
double split_threshold_;
|
314
|
+
|
315
|
+
size_t num_blocks_;
|
316
|
+
BlockSplit* split_; /* not owned */
|
317
|
+
HistogramLiteral* histograms_; /* not owned */
|
318
|
+
size_t* histograms_size_; /* not owned */
|
319
|
+
|
320
|
+
/* The number of symbols that we want to collect before deciding on whether
|
321
|
+
or not to merge the block with a previous one or emit a new block. */
|
322
|
+
size_t target_block_size_;
|
323
|
+
/* The number of symbols in the current histogram. */
|
324
|
+
size_t block_size_;
|
325
|
+
/* Offset of the current histogram. */
|
326
|
+
size_t curr_histogram_ix_;
|
327
|
+
/* Offset of the histograms of the previous two block types. */
|
328
|
+
size_t last_histogram_ix_[2];
|
329
|
+
/* Entropy of the previous two block types. */
|
330
|
+
double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
|
331
|
+
/* The number of times we merged the current block with the last one. */
|
332
|
+
size_t merge_last_count_;
|
333
|
+
} ContextBlockSplitter;
|
334
|
+
|
335
|
+
static void InitContextBlockSplitter(
|
336
|
+
MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
|
337
|
+
size_t num_contexts, size_t min_block_size, double split_threshold,
|
338
|
+
size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
|
339
|
+
size_t* histograms_size) {
|
340
|
+
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
341
|
+
size_t max_num_types;
|
342
|
+
BROTLI_DCHECK(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
|
343
|
+
|
344
|
+
self->alphabet_size_ = alphabet_size;
|
345
|
+
self->num_contexts_ = num_contexts;
|
346
|
+
self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
|
347
|
+
self->min_block_size_ = min_block_size;
|
348
|
+
self->split_threshold_ = split_threshold;
|
349
|
+
self->num_blocks_ = 0;
|
350
|
+
self->split_ = split;
|
351
|
+
self->histograms_size_ = histograms_size;
|
352
|
+
self->target_block_size_ = min_block_size;
|
353
|
+
self->block_size_ = 0;
|
354
|
+
self->curr_histogram_ix_ = 0;
|
355
|
+
self->merge_last_count_ = 0;
|
356
|
+
|
357
|
+
/* We have to allocate one more histogram than the maximum number of block
|
358
|
+
types for the current histogram when the meta-block is too big. */
|
359
|
+
max_num_types =
|
360
|
+
BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
|
361
|
+
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
362
|
+
split->types, split->types_alloc_size, max_num_blocks);
|
363
|
+
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
364
|
+
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
365
|
+
if (BROTLI_IS_OOM(m)) return;
|
366
|
+
split->num_blocks = max_num_blocks;
|
367
|
+
if (BROTLI_IS_OOM(m)) return;
|
368
|
+
BROTLI_DCHECK(*histograms == 0);
|
369
|
+
*histograms_size = max_num_types * num_contexts;
|
370
|
+
*histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
|
371
|
+
self->histograms_ = *histograms;
|
372
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
|
373
|
+
/* Clear only current histogram. */
|
374
|
+
ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
|
375
|
+
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
|
376
|
+
}
|
377
|
+
|
378
|
+
/* Does either of three things:
|
379
|
+
(1) emits the current block with a new block type;
|
380
|
+
(2) emits the current block with the type of the second last block;
|
381
|
+
(3) merges the current block with the last block. */
|
382
|
+
static void ContextBlockSplitterFinishBlock(
|
383
|
+
ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
|
384
|
+
BlockSplit* split = self->split_;
|
385
|
+
const size_t num_contexts = self->num_contexts_;
|
386
|
+
double* last_entropy = self->last_entropy_;
|
387
|
+
HistogramLiteral* histograms = self->histograms_;
|
388
|
+
|
389
|
+
if (self->block_size_ < self->min_block_size_) {
|
390
|
+
self->block_size_ = self->min_block_size_;
|
391
|
+
}
|
392
|
+
if (self->num_blocks_ == 0) {
|
393
|
+
size_t i;
|
394
|
+
/* Create first block. */
|
395
|
+
split->lengths[0] = (uint32_t)self->block_size_;
|
396
|
+
split->types[0] = 0;
|
397
|
+
|
398
|
+
for (i = 0; i < num_contexts; ++i) {
|
399
|
+
last_entropy[i] =
|
400
|
+
BitsEntropy(histograms[i].data_, self->alphabet_size_);
|
401
|
+
last_entropy[num_contexts + i] = last_entropy[i];
|
402
|
+
}
|
403
|
+
++self->num_blocks_;
|
404
|
+
++split->num_types;
|
405
|
+
self->curr_histogram_ix_ += num_contexts;
|
406
|
+
if (self->curr_histogram_ix_ < *self->histograms_size_) {
|
407
|
+
ClearHistogramsLiteral(
|
408
|
+
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
|
409
|
+
}
|
410
|
+
self->block_size_ = 0;
|
411
|
+
} else if (self->block_size_ > 0) {
|
412
|
+
/* Try merging the set of histograms for the current block type with the
|
413
|
+
respective set of histograms for the last and second last block types.
|
414
|
+
Decide over the split based on the total reduction of entropy across
|
415
|
+
all contexts. */
|
416
|
+
double entropy[BROTLI_MAX_STATIC_CONTEXTS];
|
417
|
+
HistogramLiteral* combined_histo =
|
418
|
+
BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
|
419
|
+
double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
|
420
|
+
double diff[2] = { 0.0 };
|
421
|
+
size_t i;
|
422
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(combined_histo)) return;
|
423
|
+
for (i = 0; i < num_contexts; ++i) {
|
424
|
+
size_t curr_histo_ix = self->curr_histogram_ix_ + i;
|
425
|
+
size_t j;
|
426
|
+
entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
|
427
|
+
self->alphabet_size_);
|
428
|
+
for (j = 0; j < 2; ++j) {
|
429
|
+
size_t jx = j * num_contexts + i;
|
430
|
+
size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
|
431
|
+
combined_histo[jx] = histograms[curr_histo_ix];
|
432
|
+
HistogramAddHistogramLiteral(&combined_histo[jx],
|
433
|
+
&histograms[last_histogram_ix]);
|
434
|
+
combined_entropy[jx] = BitsEntropy(
|
435
|
+
&combined_histo[jx].data_[0], self->alphabet_size_);
|
436
|
+
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
|
437
|
+
}
|
438
|
+
}
|
439
|
+
|
440
|
+
if (split->num_types < self->max_block_types_ &&
|
441
|
+
diff[0] > self->split_threshold_ &&
|
442
|
+
diff[1] > self->split_threshold_) {
|
443
|
+
/* Create new block. */
|
444
|
+
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
445
|
+
split->types[self->num_blocks_] = (uint8_t)split->num_types;
|
446
|
+
self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
|
447
|
+
self->last_histogram_ix_[0] = split->num_types * num_contexts;
|
448
|
+
for (i = 0; i < num_contexts; ++i) {
|
449
|
+
last_entropy[num_contexts + i] = last_entropy[i];
|
450
|
+
last_entropy[i] = entropy[i];
|
451
|
+
}
|
452
|
+
++self->num_blocks_;
|
453
|
+
++split->num_types;
|
454
|
+
self->curr_histogram_ix_ += num_contexts;
|
455
|
+
if (self->curr_histogram_ix_ < *self->histograms_size_) {
|
456
|
+
ClearHistogramsLiteral(
|
457
|
+
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
|
458
|
+
}
|
459
|
+
self->block_size_ = 0;
|
460
|
+
self->merge_last_count_ = 0;
|
461
|
+
self->target_block_size_ = self->min_block_size_;
|
462
|
+
} else if (diff[1] < diff[0] - 20.0) {
|
463
|
+
/* Combine this block with second last block. */
|
464
|
+
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
465
|
+
split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
|
466
|
+
BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
|
467
|
+
for (i = 0; i < num_contexts; ++i) {
|
468
|
+
histograms[self->last_histogram_ix_[0] + i] =
|
469
|
+
combined_histo[num_contexts + i];
|
470
|
+
last_entropy[num_contexts + i] = last_entropy[i];
|
471
|
+
last_entropy[i] = combined_entropy[num_contexts + i];
|
472
|
+
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
|
473
|
+
}
|
474
|
+
++self->num_blocks_;
|
475
|
+
self->block_size_ = 0;
|
476
|
+
self->merge_last_count_ = 0;
|
477
|
+
self->target_block_size_ = self->min_block_size_;
|
478
|
+
} else {
|
479
|
+
/* Combine this block with last block. */
|
480
|
+
split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
|
481
|
+
for (i = 0; i < num_contexts; ++i) {
|
482
|
+
histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
|
483
|
+
last_entropy[i] = combined_entropy[i];
|
484
|
+
if (split->num_types == 1) {
|
485
|
+
last_entropy[num_contexts + i] = last_entropy[i];
|
486
|
+
}
|
487
|
+
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
|
488
|
+
}
|
489
|
+
self->block_size_ = 0;
|
490
|
+
if (++self->merge_last_count_ > 1) {
|
491
|
+
self->target_block_size_ += self->min_block_size_;
|
492
|
+
}
|
493
|
+
}
|
494
|
+
BROTLI_FREE(m, combined_histo);
|
495
|
+
}
|
496
|
+
if (is_final) {
|
497
|
+
*self->histograms_size_ = split->num_types * num_contexts;
|
498
|
+
split->num_blocks = self->num_blocks_;
|
499
|
+
}
|
500
|
+
}
|
501
|
+
|
502
|
+
/* Adds the next symbol to the current block type and context. When the
|
503
|
+
current block reaches the target size, decides on merging the block. */
|
504
|
+
static void ContextBlockSplitterAddSymbol(
|
505
|
+
ContextBlockSplitter* self, MemoryManager* m,
|
506
|
+
size_t symbol, size_t context) {
|
507
|
+
HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
|
508
|
+
symbol);
|
509
|
+
++self->block_size_;
|
510
|
+
if (self->block_size_ == self->target_block_size_) {
|
511
|
+
ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
|
512
|
+
if (BROTLI_IS_OOM(m)) return;
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
516
|
+
static void MapStaticContexts(MemoryManager* m,
|
517
|
+
size_t num_contexts,
|
518
|
+
const uint32_t* static_context_map,
|
519
|
+
MetaBlockSplit* mb) {
|
520
|
+
size_t i;
|
521
|
+
BROTLI_DCHECK(mb->literal_context_map == 0);
|
522
|
+
mb->literal_context_map_size =
|
523
|
+
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
524
|
+
mb->literal_context_map =
|
525
|
+
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
|
526
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
|
527
|
+
|
528
|
+
for (i = 0; i < mb->literal_split.num_types; ++i) {
|
529
|
+
uint32_t offset = (uint32_t)(i * num_contexts);
|
530
|
+
size_t j;
|
531
|
+
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
|
532
|
+
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
|
533
|
+
offset + static_context_map[j];
|
534
|
+
}
|
535
|
+
}
|
536
|
+
}
|
537
|
+
|
538
|
+
static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
|
539
|
+
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
540
|
+
uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
|
541
|
+
const size_t num_contexts, const uint32_t* static_context_map,
|
542
|
+
const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
|
543
|
+
union {
|
544
|
+
BlockSplitterLiteral plain;
|
545
|
+
ContextBlockSplitter ctx;
|
546
|
+
} lit_blocks;
|
547
|
+
BlockSplitterCommand cmd_blocks;
|
548
|
+
BlockSplitterDistance dist_blocks;
|
549
|
+
size_t num_literals = 0;
|
550
|
+
size_t i;
|
551
|
+
for (i = 0; i < n_commands; ++i) {
|
552
|
+
num_literals += commands[i].insert_len_;
|
553
|
+
}
|
554
|
+
|
555
|
+
if (num_contexts == 1) {
|
556
|
+
InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
|
557
|
+
num_literals, &mb->literal_split, &mb->literal_histograms,
|
558
|
+
&mb->literal_histograms_size);
|
559
|
+
} else {
|
560
|
+
InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
|
561
|
+
num_literals, &mb->literal_split, &mb->literal_histograms,
|
562
|
+
&mb->literal_histograms_size);
|
563
|
+
}
|
564
|
+
if (BROTLI_IS_OOM(m)) return;
|
565
|
+
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
|
566
|
+
500.0, n_commands, &mb->command_split, &mb->command_histograms,
|
567
|
+
&mb->command_histograms_size);
|
568
|
+
if (BROTLI_IS_OOM(m)) return;
|
569
|
+
InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
|
570
|
+
&mb->distance_split, &mb->distance_histograms,
|
571
|
+
&mb->distance_histograms_size);
|
572
|
+
if (BROTLI_IS_OOM(m)) return;
|
573
|
+
|
574
|
+
for (i = 0; i < n_commands; ++i) {
|
575
|
+
const Command cmd = commands[i];
|
576
|
+
size_t j;
|
577
|
+
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
|
578
|
+
for (j = cmd.insert_len_; j != 0; --j) {
|
579
|
+
uint8_t literal = ringbuffer[pos & mask];
|
580
|
+
if (num_contexts == 1) {
|
581
|
+
BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
|
582
|
+
} else {
|
583
|
+
size_t context =
|
584
|
+
BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
|
585
|
+
ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
|
586
|
+
static_context_map[context]);
|
587
|
+
if (BROTLI_IS_OOM(m)) return;
|
588
|
+
}
|
589
|
+
prev_byte2 = prev_byte;
|
590
|
+
prev_byte = literal;
|
591
|
+
++pos;
|
592
|
+
}
|
593
|
+
pos += CommandCopyLen(&cmd);
|
594
|
+
if (CommandCopyLen(&cmd)) {
|
595
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
596
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
597
|
+
if (cmd.cmd_prefix_ >= 128) {
|
598
|
+
BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
|
599
|
+
}
|
600
|
+
}
|
601
|
+
}
|
602
|
+
|
603
|
+
if (num_contexts == 1) {
|
604
|
+
BlockSplitterFinishBlockLiteral(
|
605
|
+
&lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
|
606
|
+
} else {
|
607
|
+
ContextBlockSplitterFinishBlock(
|
608
|
+
&lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
|
609
|
+
if (BROTLI_IS_OOM(m)) return;
|
610
|
+
}
|
611
|
+
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
|
612
|
+
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
|
613
|
+
|
614
|
+
if (num_contexts > 1) {
|
615
|
+
MapStaticContexts(m, num_contexts, static_context_map, mb);
|
616
|
+
}
|
617
|
+
}
|
618
|
+
|
619
|
+
void BrotliBuildMetaBlockGreedy(MemoryManager* m,
|
620
|
+
const uint8_t* ringbuffer,
|
621
|
+
size_t pos,
|
622
|
+
size_t mask,
|
623
|
+
uint8_t prev_byte,
|
624
|
+
uint8_t prev_byte2,
|
625
|
+
ContextLut literal_context_lut,
|
626
|
+
size_t num_contexts,
|
627
|
+
const uint32_t* static_context_map,
|
628
|
+
const Command* commands,
|
629
|
+
size_t n_commands,
|
630
|
+
MetaBlockSplit* mb) {
|
631
|
+
if (num_contexts == 1) {
|
632
|
+
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
|
633
|
+
prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
|
634
|
+
} else {
|
635
|
+
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
|
636
|
+
prev_byte2, literal_context_lut, num_contexts, static_context_map,
|
637
|
+
commands, n_commands, mb);
|
638
|
+
}
|
639
|
+
}
|
640
|
+
|
641
|
+
void BrotliOptimizeHistograms(uint32_t num_distance_codes,
|
642
|
+
MetaBlockSplit* mb) {
|
643
|
+
uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
|
644
|
+
size_t i;
|
645
|
+
for (i = 0; i < mb->literal_histograms_size; ++i) {
|
646
|
+
BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
|
647
|
+
good_for_rle);
|
648
|
+
}
|
649
|
+
for (i = 0; i < mb->command_histograms_size; ++i) {
|
650
|
+
BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
|
651
|
+
mb->command_histograms[i].data_,
|
652
|
+
good_for_rle);
|
653
|
+
}
|
654
|
+
for (i = 0; i < mb->distance_histograms_size; ++i) {
|
655
|
+
BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
|
656
|
+
mb->distance_histograms[i].data_,
|
657
|
+
good_for_rle);
|
658
|
+
}
|
659
|
+
}
|
660
|
+
|
661
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
662
|
+
} /* extern "C" */
|
663
|
+
#endif
|