isomorfeus-ferret 0.12.6 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -4
  3. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  4. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  5. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  6. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  88. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  89. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  90. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  91. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  92. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  93. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  94. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  95. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  96. data/lib/isomorfeus/ferret/version.rb +1 -1
  97. metadata +88 -3
@@ -0,0 +1,1927 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Implementation of Brotli compressor. */
8
+
9
+ #include "brotli_encode.h"
10
+
11
+ #include <stdlib.h> /* free, malloc */
12
+ #include <string.h> /* memcpy, memset */
13
+
14
+ #include "brotli_common_constants.h"
15
+ #include "brotli_common_context.h"
16
+ #include "brotli_common_platform.h"
17
+ #include "brotli_common_version.h"
18
+ #include "brotli_enc_backward_references.h"
19
+ #include "brotli_enc_backward_references_hq.h"
20
+ #include "brotli_enc_bit_cost.h"
21
+ #include "brotli_enc_brotli_bit_stream.h"
22
+ #include "brotli_enc_compress_fragment.h"
23
+ #include "brotli_enc_compress_fragment_two_pass.h"
24
+ #include "brotli_enc_encoder_dict.h"
25
+ #include "brotli_enc_entropy_encode.h"
26
+ #include "brotli_enc_fast_log.h"
27
+ #include "brotli_enc_hash.h"
28
+ #include "brotli_enc_histogram.h"
29
+ #include "brotli_enc_memory.h"
30
+ #include "brotli_enc_metablock.h"
31
+ #include "brotli_enc_prefix.h"
32
+ #include "brotli_enc_quality.h"
33
+ #include "brotli_enc_ringbuffer.h"
34
+ #include "brotli_enc_utf8_util.h"
35
+ #include "brotli_enc_write_bits.h"
36
+
37
+ #if defined(__cplusplus) || defined(c_plusplus)
38
+ extern "C" {
39
+ #endif
40
+
41
+ #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
42
+
43
+ typedef enum BrotliEncoderStreamState {
44
+ /* Default state. */
45
+ BROTLI_STREAM_PROCESSING = 0,
46
+ /* Intermediate state; after next block is emitted, byte-padding should be
47
+ performed before getting back to default state. */
48
+ BROTLI_STREAM_FLUSH_REQUESTED = 1,
49
+ /* Last metablock was produced; no more input is acceptable. */
50
+ BROTLI_STREAM_FINISHED = 2,
51
+ /* Flushing compressed block and writing meta-data block header. */
52
+ BROTLI_STREAM_METADATA_HEAD = 3,
53
+ /* Writing metadata block body. */
54
+ BROTLI_STREAM_METADATA_BODY = 4
55
+ } BrotliEncoderStreamState;
56
+
57
+ typedef enum BrotliEncoderFlintState {
58
+ BROTLI_FLINT_NEEDS_2_BYTES = 2,
59
+ BROTLI_FLINT_NEEDS_1_BYTE = 1,
60
+ BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
61
+ BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
62
+ BROTLI_FLINT_DONE = -2
63
+ } BrotliEncoderFlintState;
64
+
65
+ typedef struct BrotliEncoderStateStruct {
66
+ BrotliEncoderParams params;
67
+
68
+ MemoryManager memory_manager_;
69
+
70
+ uint64_t input_pos_;
71
+ RingBuffer ringbuffer_;
72
+ size_t cmd_alloc_size_;
73
+ Command* commands_;
74
+ size_t num_commands_;
75
+ size_t num_literals_;
76
+ size_t last_insert_len_;
77
+ uint64_t last_flush_pos_;
78
+ uint64_t last_processed_pos_;
79
+ int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
80
+ int saved_dist_cache_[4];
81
+ uint16_t last_bytes_;
82
+ uint8_t last_bytes_bits_;
83
+ /* "Flint" is a tiny uncompressed block emitted before the continuation
84
+ block to unwire literal context from previous data. Despite being int8_t,
85
+ field is actually BrotliEncoderFlintState enum. */
86
+ int8_t flint_;
87
+ uint8_t prev_byte_;
88
+ uint8_t prev_byte2_;
89
+ size_t storage_size_;
90
+ uint8_t* storage_;
91
+
92
+ Hasher hasher_;
93
+
94
+ /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
95
+ int small_table_[1 << 10]; /* 4KiB */
96
+ int* large_table_; /* Allocated only when needed */
97
+ size_t large_table_size_;
98
+ /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
99
+ used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command
100
+ prefix code is over a smaller alphabet with the following 64 symbols:
101
+ 0 - 15: insert length code 0, copy length code 0 - 15, same distance
102
+ 16 - 39: insert length code 0, copy length code 0 - 23
103
+ 40 - 63: insert length code 0 - 23, copy length code 0
104
+ Note that symbols 16 and 40 represent the same code in the full alphabet,
105
+ but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */
106
+ uint8_t cmd_depths_[128];
107
+ uint16_t cmd_bits_[128];
108
+ /* The compressed form of the command and distance prefix codes for the next
109
+ block in FAST_ONE_PASS_COMPRESSION_QUALITY. */
110
+ uint8_t cmd_code_[512];
111
+ size_t cmd_code_numbits_;
112
+ /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
113
+ uint32_t* command_buf_;
114
+ uint8_t* literal_buf_;
115
+
116
+ uint8_t* next_out_;
117
+ size_t available_out_;
118
+ size_t total_out_;
119
+ /* Temporary buffer for padding flush bits or metadata block header / body. */
120
+ union {
121
+ uint64_t u64[2];
122
+ uint8_t u8[16];
123
+ } tiny_buf_;
124
+ uint32_t remaining_metadata_bytes_;
125
+ BrotliEncoderStreamState stream_state_;
126
+
127
+ BROTLI_BOOL is_last_block_emitted_;
128
+ BROTLI_BOOL is_initialized_;
129
+ } BrotliEncoderStateStruct;
130
+
131
+ static size_t InputBlockSize(BrotliEncoderState* s) {
132
+ return (size_t)1 << s->params.lgblock;
133
+ }
134
+
135
+ static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
136
+ return s->input_pos_ - s->last_processed_pos_;
137
+ }
138
+
139
+ static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
140
+ const uint64_t delta = UnprocessedInputSize(s);
141
+ size_t block_size = InputBlockSize(s);
142
+ if (delta >= block_size) return 0;
143
+ return block_size - (size_t)delta;
144
+ }
145
+
146
+ BROTLI_BOOL BrotliEncoderSetParameter(
147
+ BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
148
+ /* Changing parameters on the fly is not implemented yet. */
149
+ if (state->is_initialized_) return BROTLI_FALSE;
150
+ /* TODO: Validate/clamp parameters here. */
151
+ switch (p) {
152
+ case BROTLI_PARAM_MODE:
153
+ state->params.mode = (BrotliEncoderMode)value;
154
+ return BROTLI_TRUE;
155
+
156
+ case BROTLI_PARAM_QUALITY:
157
+ state->params.quality = (int)value;
158
+ return BROTLI_TRUE;
159
+
160
+ case BROTLI_PARAM_LGWIN:
161
+ state->params.lgwin = (int)value;
162
+ return BROTLI_TRUE;
163
+
164
+ case BROTLI_PARAM_LGBLOCK:
165
+ state->params.lgblock = (int)value;
166
+ return BROTLI_TRUE;
167
+
168
+ case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
169
+ if ((value != 0) && (value != 1)) return BROTLI_FALSE;
170
+ state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
171
+ return BROTLI_TRUE;
172
+
173
+ case BROTLI_PARAM_SIZE_HINT:
174
+ state->params.size_hint = value;
175
+ return BROTLI_TRUE;
176
+
177
+ case BROTLI_PARAM_LARGE_WINDOW:
178
+ state->params.large_window = TO_BROTLI_BOOL(!!value);
179
+ return BROTLI_TRUE;
180
+
181
+ case BROTLI_PARAM_NPOSTFIX:
182
+ state->params.dist.distance_postfix_bits = value;
183
+ return BROTLI_TRUE;
184
+
185
+ case BROTLI_PARAM_NDIRECT:
186
+ state->params.dist.num_direct_distance_codes = value;
187
+ return BROTLI_TRUE;
188
+
189
+ case BROTLI_PARAM_STREAM_OFFSET:
190
+ if (value > (1u << 30)) return BROTLI_FALSE;
191
+ state->params.stream_offset = value;
192
+ return BROTLI_TRUE;
193
+
194
+ default: return BROTLI_FALSE;
195
+ }
196
+ }
197
+
198
+ /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
199
+ "not-a-first-lap" feature. */
200
+ static uint32_t WrapPosition(uint64_t position) {
201
+ uint32_t result = (uint32_t)position;
202
+ uint64_t gb = position >> 30;
203
+ if (gb > 2) {
204
+ /* Wrap every 2GiB; The first 3GB are continuous. */
205
+ result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
206
+ }
207
+ return result;
208
+ }
209
+
210
+ static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
211
+ MemoryManager* m = &s->memory_manager_;
212
+ if (s->storage_size_ < size) {
213
+ BROTLI_FREE(m, s->storage_);
214
+ s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
215
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
216
+ s->storage_size_ = size;
217
+ }
218
+ return s->storage_;
219
+ }
220
+
221
+ static size_t HashTableSize(size_t max_table_size, size_t input_size) {
222
+ size_t htsize = 256;
223
+ while (htsize < max_table_size && htsize < input_size) {
224
+ htsize <<= 1;
225
+ }
226
+ return htsize;
227
+ }
228
+
229
+ static int* GetHashTable(BrotliEncoderState* s, int quality,
230
+ size_t input_size, size_t* table_size) {
231
+ /* Use smaller hash table when input.size() is smaller, since we
232
+ fill the table, incurring O(hash table size) overhead for
233
+ compression, and if the input is short, we won't need that
234
+ many hash table entries anyway. */
235
+ MemoryManager* m = &s->memory_manager_;
236
+ const size_t max_table_size = MaxHashTableSize(quality);
237
+ size_t htsize = HashTableSize(max_table_size, input_size);
238
+ int* table;
239
+ BROTLI_DCHECK(max_table_size >= 256);
240
+ if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
241
+ /* Only odd shifts are supported by fast-one-pass. */
242
+ if ((htsize & 0xAAAAA) == 0) {
243
+ htsize <<= 1;
244
+ }
245
+ }
246
+
247
+ if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
248
+ table = s->small_table_;
249
+ } else {
250
+ if (htsize > s->large_table_size_) {
251
+ s->large_table_size_ = htsize;
252
+ BROTLI_FREE(m, s->large_table_);
253
+ s->large_table_ = BROTLI_ALLOC(m, int, htsize);
254
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
255
+ }
256
+ table = s->large_table_;
257
+ }
258
+
259
+ *table_size = htsize;
260
+ memset(table, 0, htsize * sizeof(*table));
261
+ return table;
262
+ }
263
+
264
+ static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
265
+ uint16_t* last_bytes, uint8_t* last_bytes_bits) {
266
+ if (large_window) {
267
+ *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
268
+ *last_bytes_bits = 14;
269
+ } else {
270
+ if (lgwin == 16) {
271
+ *last_bytes = 0;
272
+ *last_bytes_bits = 1;
273
+ } else if (lgwin == 17) {
274
+ *last_bytes = 1;
275
+ *last_bytes_bits = 7;
276
+ } else if (lgwin > 17) {
277
+ *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
278
+ *last_bytes_bits = 4;
279
+ } else {
280
+ *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
281
+ *last_bytes_bits = 7;
282
+ }
283
+ }
284
+ }
285
+
286
+ /* Initializes the command and distance prefix codes for the first block. */
287
+ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
288
+ uint16_t cmd_bits[128],
289
+ uint8_t cmd_code[512],
290
+ size_t* cmd_code_numbits) {
291
+ static const uint8_t kDefaultCommandDepths[128] = {
292
+ 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
293
+ 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
294
+ 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
295
+ 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
296
+ 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297
+ 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
298
+ 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
299
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
300
+ };
301
+ static const uint16_t kDefaultCommandBits[128] = {
302
+ 0, 0, 8, 9, 3, 35, 7, 71,
303
+ 39, 103, 23, 47, 175, 111, 239, 31,
304
+ 0, 0, 0, 4, 12, 2, 10, 6,
305
+ 13, 29, 11, 43, 27, 59, 87, 55,
306
+ 15, 79, 319, 831, 191, 703, 447, 959,
307
+ 0, 14, 1, 25, 5, 21, 19, 51,
308
+ 119, 159, 95, 223, 479, 991, 63, 575,
309
+ 127, 639, 383, 895, 255, 767, 511, 1023,
310
+ 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311
+ 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
312
+ 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
313
+ 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
314
+ };
315
+ static const uint8_t kDefaultCommandCode[] = {
316
+ 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
317
+ 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
318
+ 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
319
+ 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
320
+ 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
321
+ };
322
+ static const size_t kDefaultCommandCodeNumBits = 448;
323
+ COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
324
+ COPY_ARRAY(cmd_bits, kDefaultCommandBits);
325
+
326
+ /* Initialize the pre-compressed form of the command and distance prefix
327
+ codes. */
328
+ COPY_ARRAY(cmd_code, kDefaultCommandCode);
329
+ *cmd_code_numbits = kDefaultCommandCodeNumBits;
330
+ }
331
+
332
+ /* Decide about the context map based on the ability of the prediction
333
+ ability of the previous byte UTF8-prefix on the next byte. The
334
+ prediction ability is calculated as Shannon entropy. Here we need
335
+ Shannon entropy instead of 'BitsEntropy' since the prefix will be
336
+ encoded with the remaining 6 bits of the following byte, and
337
+ BitsEntropy will assume that symbol to be stored alone using Huffman
338
+ coding. */
339
+ static void ChooseContextMap(int quality,
340
+ uint32_t* bigram_histo,
341
+ size_t* num_literal_contexts,
342
+ const uint32_t** literal_context_map) {
343
+ static const uint32_t kStaticContextMapContinuation[64] = {
344
+ 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348
+ };
349
+ static const uint32_t kStaticContextMapSimpleUTF8[64] = {
350
+ 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
354
+ };
355
+
356
+ uint32_t monogram_histo[3] = { 0 };
357
+ uint32_t two_prefix_histo[6] = { 0 };
358
+ size_t total;
359
+ size_t i;
360
+ size_t dummy;
361
+ double entropy[4];
362
+ for (i = 0; i < 9; ++i) {
363
+ monogram_histo[i % 3] += bigram_histo[i];
364
+ two_prefix_histo[i % 6] += bigram_histo[i];
365
+ }
366
+ entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
367
+ entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
368
+ ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
369
+ entropy[3] = 0;
370
+ for (i = 0; i < 3; ++i) {
371
+ entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
372
+ }
373
+
374
+ total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
375
+ BROTLI_DCHECK(total != 0);
376
+ entropy[0] = 1.0 / (double)total;
377
+ entropy[1] *= entropy[0];
378
+ entropy[2] *= entropy[0];
379
+ entropy[3] *= entropy[0];
380
+
381
+ if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
382
+ /* 3 context models is a bit slower, don't use it at lower qualities. */
383
+ entropy[3] = entropy[1] * 10;
384
+ }
385
+ /* If expected savings by symbol are less than 0.2 bits, skip the
386
+ context modeling -- in exchange for faster decoding speed. */
387
+ if (entropy[1] - entropy[2] < 0.2 &&
388
+ entropy[1] - entropy[3] < 0.2) {
389
+ *num_literal_contexts = 1;
390
+ } else if (entropy[2] - entropy[3] < 0.02) {
391
+ *num_literal_contexts = 2;
392
+ *literal_context_map = kStaticContextMapSimpleUTF8;
393
+ } else {
394
+ *num_literal_contexts = 3;
395
+ *literal_context_map = kStaticContextMapContinuation;
396
+ }
397
+ }
398
+
399
+ /* Decide if we want to use a more complex static context map containing 13
400
+ context values, based on the entropy reduction of histograms over the
401
+ first 5 bits of literals. */
402
+ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
403
+ size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
404
+ size_t* num_literal_contexts, const uint32_t** literal_context_map) {
405
+ static const uint32_t kStaticContextMapComplexUTF8[64] = {
406
+ 11, 11, 12, 12, /* 0 special */
407
+ 0, 0, 0, 0, /* 4 lf */
408
+ 1, 1, 9, 9, /* 8 space */
409
+ 2, 2, 2, 2, /* !, first after space/lf and after something else. */
410
+ 1, 1, 1, 1, /* " */
411
+ 8, 3, 3, 3, /* % */
412
+ 1, 1, 1, 1, /* ({[ */
413
+ 2, 2, 2, 2, /* }]) */
414
+ 8, 4, 4, 4, /* :; */
415
+ 8, 7, 4, 4, /* . */
416
+ 8, 0, 0, 0, /* > */
417
+ 3, 3, 3, 3, /* [0..9] */
418
+ 5, 5, 10, 5, /* [A-Z] */
419
+ 5, 5, 10, 5,
420
+ 6, 6, 6, 6, /* [a-z] */
421
+ 6, 6, 6, 6,
422
+ };
423
+ BROTLI_UNUSED(quality);
424
+ /* Try the more complex static context map only for long data. */
425
+ if (size_hint < (1 << 20)) {
426
+ return BROTLI_FALSE;
427
+ } else {
428
+ const size_t end_pos = start_pos + length;
429
+ /* To make entropy calculations faster and to fit on the stack, we collect
430
+ histograms over the 5 most significant bits of literals. One histogram
431
+ without context and 13 additional histograms for each context value. */
432
+ uint32_t combined_histo[32] = { 0 };
433
+ uint32_t context_histo[13][32] = { { 0 } };
434
+ uint32_t total = 0;
435
+ double entropy[3];
436
+ size_t dummy;
437
+ size_t i;
438
+ ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
439
+ for (; start_pos + 64 <= end_pos; start_pos += 4096) {
440
+ const size_t stride_end_pos = start_pos + 64;
441
+ uint8_t prev2 = input[start_pos & mask];
442
+ uint8_t prev1 = input[(start_pos + 1) & mask];
443
+ size_t pos;
444
+ /* To make the analysis of the data faster we only examine 64 byte long
445
+ strides at every 4kB intervals. */
446
+ for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
447
+ const uint8_t literal = input[pos & mask];
448
+ const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
449
+ BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
450
+ ++total;
451
+ ++combined_histo[literal >> 3];
452
+ ++context_histo[context][literal >> 3];
453
+ prev2 = prev1;
454
+ prev1 = literal;
455
+ }
456
+ }
457
+ entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
458
+ entropy[2] = 0;
459
+ for (i = 0; i < 13; ++i) {
460
+ entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy);
461
+ }
462
+ entropy[0] = 1.0 / (double)total;
463
+ entropy[1] *= entropy[0];
464
+ entropy[2] *= entropy[0];
465
+ /* The triggering heuristics below were tuned by compressing the individual
466
+ files of the silesia corpus. If we skip this kind of context modeling
467
+ for not very well compressible input (i.e. entropy using context modeling
468
+ is 60% of maximal entropy) or if expected savings by symbol are less
469
+ than 0.2 bits, then in every case when it triggers, the final compression
470
+ ratio is improved. Note however that this heuristics might be too strict
471
+ for some cases and could be tuned further. */
472
+ if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
473
+ return BROTLI_FALSE;
474
+ } else {
475
+ *num_literal_contexts = 13;
476
+ *literal_context_map = kStaticContextMapComplexUTF8;
477
+ return BROTLI_TRUE;
478
+ }
479
+ }
480
+ }
481
+
482
+ static void DecideOverLiteralContextModeling(const uint8_t* input,
483
+ size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
484
+ size_t* num_literal_contexts, const uint32_t** literal_context_map) {
485
+ if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
486
+ return;
487
+ } else if (ShouldUseComplexStaticContextMap(
488
+ input, start_pos, length, mask, quality, size_hint,
489
+ num_literal_contexts, literal_context_map)) {
490
+ /* Context map was already set, nothing else to do. */
491
+ } else {
492
+ /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
493
+ UTF8 data faster we only examine 64 byte long strides at every 4kB
494
+ intervals. */
495
+ const size_t end_pos = start_pos + length;
496
+ uint32_t bigram_prefix_histo[9] = { 0 };
497
+ for (; start_pos + 64 <= end_pos; start_pos += 4096) {
498
+ static const int lut[4] = { 0, 0, 1, 2 };
499
+ const size_t stride_end_pos = start_pos + 64;
500
+ int prev = lut[input[start_pos & mask] >> 6] * 3;
501
+ size_t pos;
502
+ for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
503
+ const uint8_t literal = input[pos & mask];
504
+ ++bigram_prefix_histo[prev + lut[literal >> 6]];
505
+ prev = lut[literal >> 6] * 3;
506
+ }
507
+ }
508
+ ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
509
+ literal_context_map);
510
+ }
511
+ }
512
+
513
+ static BROTLI_BOOL ShouldCompress(
514
+ const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
515
+ const size_t bytes, const size_t num_literals, const size_t num_commands) {
516
+ /* TODO: find more precise minimal block overhead. */
517
+ if (bytes <= 2) return BROTLI_FALSE;
518
+ if (num_commands < (bytes >> 8) + 2) {
519
+ if ((double)num_literals > 0.99 * (double)bytes) {
520
+ uint32_t literal_histo[256] = { 0 };
521
+ static const uint32_t kSampleRate = 13;
522
+ static const double kMinEntropy = 7.92;
523
+ const double bit_cost_threshold =
524
+ (double)bytes * kMinEntropy / kSampleRate;
525
+ size_t t = (bytes + kSampleRate - 1) / kSampleRate;
526
+ uint32_t pos = (uint32_t)last_flush_pos;
527
+ size_t i;
528
+ for (i = 0; i < t; i++) {
529
+ ++literal_histo[data[pos & mask]];
530
+ pos += kSampleRate;
531
+ }
532
+ if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
533
+ return BROTLI_FALSE;
534
+ }
535
+ }
536
+ }
537
+ return BROTLI_TRUE;
538
+ }
539
+
540
+ /* Chooses the literal context mode for a metablock */
541
+ static ContextType ChooseContextMode(const BrotliEncoderParams* params,
542
+ const uint8_t* data, const size_t pos, const size_t mask,
543
+ const size_t length) {
544
+ /* We only do the computation for the option of something else than
545
+ CONTEXT_UTF8 for the highest qualities */
546
+ if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
547
+ !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
548
+ return CONTEXT_SIGNED;
549
+ }
550
+ return CONTEXT_UTF8;
551
+ }
552
+
553
+ static void WriteMetaBlockInternal(MemoryManager* m,
554
+ const uint8_t* data,
555
+ const size_t mask,
556
+ const uint64_t last_flush_pos,
557
+ const size_t bytes,
558
+ const BROTLI_BOOL is_last,
559
+ ContextType literal_context_mode,
560
+ const BrotliEncoderParams* params,
561
+ const uint8_t prev_byte,
562
+ const uint8_t prev_byte2,
563
+ const size_t num_literals,
564
+ const size_t num_commands,
565
+ Command* commands,
566
+ const int* saved_dist_cache,
567
+ int* dist_cache,
568
+ size_t* storage_ix,
569
+ uint8_t* storage) {
570
+ const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
571
+ uint16_t last_bytes;
572
+ uint8_t last_bytes_bits;
573
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
574
+ BrotliEncoderParams block_params = *params;
575
+
576
+ if (bytes == 0) {
577
+ /* Write the ISLAST and ISEMPTY bits. */
578
+ BrotliWriteBits(2, 3, storage_ix, storage);
579
+ *storage_ix = (*storage_ix + 7u) & ~7u;
580
+ return;
581
+ }
582
+
583
+ if (!ShouldCompress(data, mask, last_flush_pos, bytes,
584
+ num_literals, num_commands)) {
585
+ /* Restore the distance cache, as its last update by
586
+ CreateBackwardReferences is now unused. */
587
+ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
588
+ BrotliStoreUncompressedMetaBlock(is_last, data,
589
+ wrapped_last_flush_pos, mask, bytes,
590
+ storage_ix, storage);
591
+ return;
592
+ }
593
+
594
+ BROTLI_DCHECK(*storage_ix <= 14);
595
+ last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
596
+ last_bytes_bits = (uint8_t)(*storage_ix);
597
+ if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
598
+ BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
599
+ bytes, mask, is_last, params,
600
+ commands, num_commands,
601
+ storage_ix, storage);
602
+ if (BROTLI_IS_OOM(m)) return;
603
+ } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
604
+ BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
605
+ bytes, mask, is_last, params,
606
+ commands, num_commands,
607
+ storage_ix, storage);
608
+ if (BROTLI_IS_OOM(m)) return;
609
+ } else {
610
+ MetaBlockSplit mb;
611
+ InitMetaBlockSplit(&mb);
612
+ if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
613
+ size_t num_literal_contexts = 1;
614
+ const uint32_t* literal_context_map = NULL;
615
+ if (!params->disable_literal_context_modeling) {
616
+ DecideOverLiteralContextModeling(
617
+ data, wrapped_last_flush_pos, bytes, mask, params->quality,
618
+ params->size_hint, &num_literal_contexts,
619
+ &literal_context_map);
620
+ }
621
+ BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
622
+ prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
623
+ literal_context_map, commands, num_commands, &mb);
624
+ if (BROTLI_IS_OOM(m)) return;
625
+ } else {
626
+ BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
627
+ prev_byte, prev_byte2,
628
+ commands, num_commands,
629
+ literal_context_mode,
630
+ &mb);
631
+ if (BROTLI_IS_OOM(m)) return;
632
+ }
633
+ if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
634
+ /* The number of distance symbols effectively used for distance
635
+ histograms. It might be less than distance alphabet size
636
+ for "Large Window Brotli" (32-bit). */
637
+ BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
638
+ }
639
+ BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
640
+ prev_byte, prev_byte2,
641
+ is_last,
642
+ &block_params,
643
+ literal_context_mode,
644
+ commands, num_commands,
645
+ &mb,
646
+ storage_ix, storage);
647
+ if (BROTLI_IS_OOM(m)) return;
648
+ DestroyMetaBlockSplit(m, &mb);
649
+ }
650
+ if (bytes + 4 < (*storage_ix >> 3)) {
651
+ /* Restore the distance cache and last byte. */
652
+ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
653
+ storage[0] = (uint8_t)last_bytes;
654
+ storage[1] = (uint8_t)(last_bytes >> 8);
655
+ *storage_ix = last_bytes_bits;
656
+ BrotliStoreUncompressedMetaBlock(is_last, data,
657
+ wrapped_last_flush_pos, mask,
658
+ bytes, storage_ix, storage);
659
+ }
660
+ }
661
+
662
+ static void ChooseDistanceParams(BrotliEncoderParams* params) {
663
+ uint32_t distance_postfix_bits = 0;
664
+ uint32_t num_direct_distance_codes = 0;
665
+
666
+ if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
667
+ uint32_t ndirect_msb;
668
+ if (params->mode == BROTLI_MODE_FONT) {
669
+ distance_postfix_bits = 1;
670
+ num_direct_distance_codes = 12;
671
+ } else {
672
+ distance_postfix_bits = params->dist.distance_postfix_bits;
673
+ num_direct_distance_codes = params->dist.num_direct_distance_codes;
674
+ }
675
+ ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
676
+ if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
677
+ num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
678
+ (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
679
+ distance_postfix_bits = 0;
680
+ num_direct_distance_codes = 0;
681
+ }
682
+ }
683
+
684
+ BrotliInitDistanceParams(
685
+ params, distance_postfix_bits, num_direct_distance_codes);
686
+ }
687
+
688
+ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
689
+ if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
690
+ if (s->is_initialized_) return BROTLI_TRUE;
691
+
692
+ s->last_bytes_bits_ = 0;
693
+ s->last_bytes_ = 0;
694
+ s->flint_ = BROTLI_FLINT_DONE;
695
+ s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
696
+
697
+ SanitizeParams(&s->params);
698
+ s->params.lgblock = ComputeLgBlock(&s->params);
699
+ ChooseDistanceParams(&s->params);
700
+
701
+ if (s->params.stream_offset != 0) {
702
+ s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
703
+ /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
704
+ s->dist_cache_[0] = -16;
705
+ s->dist_cache_[1] = -16;
706
+ s->dist_cache_[2] = -16;
707
+ s->dist_cache_[3] = -16;
708
+ memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
709
+ }
710
+
711
+ RingBufferSetup(&s->params, &s->ringbuffer_);
712
+
713
+ /* Initialize last byte with stream header. */
714
+ {
715
+ int lgwin = s->params.lgwin;
716
+ if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
717
+ s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
718
+ lgwin = BROTLI_MAX(int, lgwin, 18);
719
+ }
720
+ if (s->params.stream_offset == 0) {
721
+ EncodeWindowBits(lgwin, s->params.large_window,
722
+ &s->last_bytes_, &s->last_bytes_bits_);
723
+ } else {
724
+ /* Bigger values have the same effect, but could cause overflows. */
725
+ s->params.stream_offset = BROTLI_MIN(size_t,
726
+ s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
727
+ }
728
+ }
729
+
730
+ if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
731
+ InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,
732
+ s->cmd_code_, &s->cmd_code_numbits_);
733
+ }
734
+
735
+ s->is_initialized_ = BROTLI_TRUE;
736
+ return BROTLI_TRUE;
737
+ }
738
+
739
+ static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
740
+ params->mode = BROTLI_DEFAULT_MODE;
741
+ params->large_window = BROTLI_FALSE;
742
+ params->quality = BROTLI_DEFAULT_QUALITY;
743
+ params->lgwin = BROTLI_DEFAULT_WINDOW;
744
+ params->lgblock = 0;
745
+ params->stream_offset = 0;
746
+ params->size_hint = 0;
747
+ params->disable_literal_context_modeling = BROTLI_FALSE;
748
+ BrotliInitEncoderDictionary(&params->dictionary);
749
+ params->dist.distance_postfix_bits = 0;
750
+ params->dist.num_direct_distance_codes = 0;
751
+ params->dist.alphabet_size_max =
752
+ BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
753
+ params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
754
+ params->dist.max_distance = BROTLI_MAX_DISTANCE;
755
+ }
756
+
757
+ static void BrotliEncoderInitState(BrotliEncoderState* s) {
758
+ BrotliEncoderInitParams(&s->params);
759
+ s->input_pos_ = 0;
760
+ s->num_commands_ = 0;
761
+ s->num_literals_ = 0;
762
+ s->last_insert_len_ = 0;
763
+ s->last_flush_pos_ = 0;
764
+ s->last_processed_pos_ = 0;
765
+ s->prev_byte_ = 0;
766
+ s->prev_byte2_ = 0;
767
+ s->storage_size_ = 0;
768
+ s->storage_ = 0;
769
+ HasherInit(&s->hasher_);
770
+ s->large_table_ = NULL;
771
+ s->large_table_size_ = 0;
772
+ s->cmd_code_numbits_ = 0;
773
+ s->command_buf_ = NULL;
774
+ s->literal_buf_ = NULL;
775
+ s->next_out_ = NULL;
776
+ s->available_out_ = 0;
777
+ s->total_out_ = 0;
778
+ s->stream_state_ = BROTLI_STREAM_PROCESSING;
779
+ s->is_last_block_emitted_ = BROTLI_FALSE;
780
+ s->is_initialized_ = BROTLI_FALSE;
781
+
782
+ RingBufferInit(&s->ringbuffer_);
783
+
784
+ s->commands_ = 0;
785
+ s->cmd_alloc_size_ = 0;
786
+
787
+ /* Initialize distance cache. */
788
+ s->dist_cache_[0] = 4;
789
+ s->dist_cache_[1] = 11;
790
+ s->dist_cache_[2] = 15;
791
+ s->dist_cache_[3] = 16;
792
+ /* Save the state of the distance cache in case we need to restore it for
793
+ emitting an uncompressed block. */
794
+ memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
795
+ }
796
+
797
+ BrotliEncoderState* BrotliEncoderCreateInstance(
798
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
799
+ BrotliEncoderState* state = 0;
800
+ if (!alloc_func && !free_func) {
801
+ state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
802
+ } else if (alloc_func && free_func) {
803
+ state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));
804
+ }
805
+ if (state == 0) {
806
+ /* BROTLI_DUMP(); */
807
+ return 0;
808
+ }
809
+ BrotliInitMemoryManager(
810
+ &state->memory_manager_, alloc_func, free_func, opaque);
811
+ BrotliEncoderInitState(state);
812
+ return state;
813
+ }
814
+
815
+ static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
816
+ MemoryManager* m = &s->memory_manager_;
817
+ if (BROTLI_IS_OOM(m)) {
818
+ BrotliWipeOutMemoryManager(m);
819
+ return;
820
+ }
821
+ BROTLI_FREE(m, s->storage_);
822
+ BROTLI_FREE(m, s->commands_);
823
+ RingBufferFree(m, &s->ringbuffer_);
824
+ DestroyHasher(m, &s->hasher_);
825
+ BROTLI_FREE(m, s->large_table_);
826
+ BROTLI_FREE(m, s->command_buf_);
827
+ BROTLI_FREE(m, s->literal_buf_);
828
+ }
829
+
830
+ /* Deinitializes and frees BrotliEncoderState instance. */
831
+ void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
832
+ if (!state) {
833
+ return;
834
+ } else {
835
+ MemoryManager* m = &state->memory_manager_;
836
+ brotli_free_func free_func = m->free_func;
837
+ void* opaque = m->opaque;
838
+ BrotliEncoderCleanupState(state);
839
+ free_func(opaque, state);
840
+ }
841
+ }
842
+
843
+ /*
844
+ Copies the given input data to the internal ring buffer of the compressor.
845
+ No processing of the data occurs at this time and this function can be
846
+ called multiple times before calling WriteBrotliData() to process the
847
+ accumulated input. At most input_block_size() bytes of input data can be
848
+ copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
849
+ */
850
+ static void CopyInputToRingBuffer(BrotliEncoderState* s,
851
+ const size_t input_size,
852
+ const uint8_t* input_buffer) {
853
+ RingBuffer* ringbuffer_ = &s->ringbuffer_;
854
+ MemoryManager* m = &s->memory_manager_;
855
+ RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
856
+ if (BROTLI_IS_OOM(m)) return;
857
+ s->input_pos_ += input_size;
858
+
859
+ /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
860
+ hashing not depend on uninitialized data. This makes compression
861
+ deterministic and it prevents uninitialized memory warnings in Valgrind.
862
+ Even without erasing, the output would be valid (but nondeterministic).
863
+
864
+ Background information: The compressor stores short (at most 8 bytes)
865
+ substrings of the input already read in a hash table, and detects
866
+ repetitions by looking up such substrings in the hash table. If it
867
+ can find a substring, it checks whether the substring is really there
868
+ in the ring buffer (or it's just a hash collision). Should the hash
869
+ table become corrupt, this check makes sure that the output is
870
+ still valid, albeit the compression ratio would be bad.
871
+
872
+ The compressor populates the hash table from the ring buffer as it's
873
+ reading new bytes from the input. However, at the last few indexes of
874
+ the ring buffer, there are not enough bytes to build full-length
875
+ substrings from. Since the hash table always contains full-length
876
+ substrings, we erase with dummy zeros here to make sure that those
877
+ substrings will contain zeros at the end instead of uninitialized
878
+ data.
879
+
880
+ Please note that erasing is not necessary (because the
881
+ memory region is already initialized since he ring buffer
882
+ has a `tail' that holds a copy of the beginning,) so we
883
+ skip erasing if we have already gone around at least once in
884
+ the ring buffer.
885
+
886
+ Only clear during the first round of ring-buffer writes. On
887
+ subsequent rounds data in the ring-buffer would be affected. */
888
+ if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
889
+ /* This is the first time when the ring buffer is being written.
890
+ We clear 7 bytes just after the bytes that have been copied from
891
+ the input buffer.
892
+
893
+ The ring-buffer has a "tail" that holds a copy of the beginning,
894
+ but only once the ring buffer has been fully written once, i.e.,
895
+ pos <= mask. For the first time, we need to write values
896
+ in this tail (where index may be larger than mask), so that
897
+ we have exactly defined behavior and don't read uninitialized
898
+ memory. Due to performance reasons, hashing reads data using a
899
+ LOAD64, which can go 7 bytes beyond the bytes written in the
900
+ ring-buffer. */
901
+ memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
902
+ }
903
+ }
904
+
905
+ /* Marks all input as processed.
906
+ Returns true if position wrapping occurs. */
907
+ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
908
+ uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
909
+ uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
910
+ s->last_processed_pos_ = s->input_pos_;
911
+ return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
912
+ }
913
+
914
+ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
915
+ uint32_t* wrapped_last_processed_pos) {
916
+ Command* last_command = &s->commands_[s->num_commands_ - 1];
917
+ const uint8_t* data = s->ringbuffer_.buffer_;
918
+ const uint32_t mask = s->ringbuffer_.mask_;
919
+ uint64_t max_backward_distance =
920
+ (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
921
+ uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
922
+ uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
923
+ uint64_t max_distance = last_processed_pos < max_backward_distance ?
924
+ last_processed_pos : max_backward_distance;
925
+ uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
926
+ uint32_t distance_code = CommandRestoreDistanceCode(last_command,
927
+ &s->params.dist);
928
+ if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
929
+ distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
930
+ if (cmd_dist <= max_distance) {
931
+ while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
932
+ data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
933
+ last_command->copy_len_++;
934
+ (*bytes)--;
935
+ (*wrapped_last_processed_pos)++;
936
+ }
937
+ } else {
938
+ }
939
+ /* The copy length is at most the metablock size, and thus expressible. */
940
+ GetLengthCode(last_command->insert_len_,
941
+ (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
942
+ (int)(last_command->copy_len_ >> 25)),
943
+ TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
944
+ &last_command->cmd_prefix_);
945
+ }
946
+ }
947
+
948
+ /*
949
+ Processes the accumulated input data and sets |*out_size| to the length of
950
+ the new output meta-block, or to zero if no new output meta-block has been
951
+ created (in this case the processed input data is buffered internally).
952
+ If |*out_size| is positive, |*output| points to the start of the output
953
+ data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
954
+ always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
955
+ to 7 bits of the last byte of output. To force encoder to dump the remaining
956
+ bits use WriteMetadata() to append an empty meta-data block.
957
+ Returns BROTLI_FALSE if the size of the input data is larger than
958
+ input_block_size().
959
+ */
960
+ static BROTLI_BOOL EncodeData(
961
+ BrotliEncoderState* s, const BROTLI_BOOL is_last,
962
+ const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
963
+ const uint64_t delta = UnprocessedInputSize(s);
964
+ uint32_t bytes = (uint32_t)delta;
965
+ uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
966
+ uint8_t* data;
967
+ uint32_t mask;
968
+ MemoryManager* m = &s->memory_manager_;
969
+ ContextType literal_context_mode;
970
+ ContextLut literal_context_lut;
971
+
972
+ data = s->ringbuffer_.buffer_;
973
+ mask = s->ringbuffer_.mask_;
974
+
975
+ /* Adding more blocks after "last" block is forbidden. */
976
+ if (s->is_last_block_emitted_) return BROTLI_FALSE;
977
+ if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
978
+
979
+ if (delta > InputBlockSize(s)) {
980
+ return BROTLI_FALSE;
981
+ }
982
+ if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
983
+ !s->command_buf_) {
984
+ s->command_buf_ =
985
+ BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
986
+ s->literal_buf_ =
987
+ BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
988
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
989
+ BROTLI_IS_NULL(s->literal_buf_)) {
990
+ return BROTLI_FALSE;
991
+ }
992
+ }
993
+
994
+ if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
995
+ s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
996
+ uint8_t* storage;
997
+ size_t storage_ix = s->last_bytes_bits_;
998
+ size_t table_size;
999
+ int* table;
1000
+
1001
+ if (delta == 0 && !is_last) {
1002
+ /* We have no new input data and we don't have to finish the stream, so
1003
+ nothing to do. */
1004
+ *out_size = 0;
1005
+ return BROTLI_TRUE;
1006
+ }
1007
+ storage = GetBrotliStorage(s, 2 * bytes + 503);
1008
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1009
+ storage[0] = (uint8_t)s->last_bytes_;
1010
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1011
+ table = GetHashTable(s, s->params.quality, bytes, &table_size);
1012
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1013
+ if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1014
+ BrotliCompressFragmentFast(
1015
+ m, &data[wrapped_last_processed_pos & mask],
1016
+ bytes, is_last,
1017
+ table, table_size,
1018
+ s->cmd_depths_, s->cmd_bits_,
1019
+ &s->cmd_code_numbits_, s->cmd_code_,
1020
+ &storage_ix, storage);
1021
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1022
+ } else {
1023
+ BrotliCompressFragmentTwoPass(
1024
+ m, &data[wrapped_last_processed_pos & mask],
1025
+ bytes, is_last,
1026
+ s->command_buf_, s->literal_buf_,
1027
+ table, table_size,
1028
+ &storage_ix, storage);
1029
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1030
+ }
1031
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1032
+ s->last_bytes_bits_ = storage_ix & 7u;
1033
+ UpdateLastProcessedPos(s);
1034
+ *output = &storage[0];
1035
+ *out_size = storage_ix >> 3;
1036
+ return BROTLI_TRUE;
1037
+ }
1038
+
1039
+ {
1040
+ /* Theoretical max number of commands is 1 per 2 bytes. */
1041
+ size_t newsize = s->num_commands_ + bytes / 2 + 1;
1042
+ if (newsize > s->cmd_alloc_size_) {
1043
+ Command* new_commands;
1044
+ /* Reserve a bit more memory to allow merging with a next block
1045
+ without reallocation: that would impact speed. */
1046
+ newsize += (bytes / 4) + 16;
1047
+ s->cmd_alloc_size_ = newsize;
1048
+ new_commands = BROTLI_ALLOC(m, Command, newsize);
1049
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
1050
+ if (s->commands_) {
1051
+ memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1052
+ BROTLI_FREE(m, s->commands_);
1053
+ }
1054
+ s->commands_ = new_commands;
1055
+ }
1056
+ }
1057
+
1058
+ InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1059
+ wrapped_last_processed_pos, bytes, is_last);
1060
+
1061
+ literal_context_mode = ChooseContextMode(
1062
+ &s->params, data, WrapPosition(s->last_flush_pos_),
1063
+ mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1064
+ literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1065
+
1066
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1067
+
1068
+ if (s->num_commands_ && s->last_insert_len_ == 0) {
1069
+ ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1070
+ }
1071
+
1072
+ if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1073
+ BROTLI_DCHECK(s->params.hasher.type == 10);
1074
+ BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1075
+ data, mask, literal_context_lut, &s->params,
1076
+ &s->hasher_, s->dist_cache_,
1077
+ &s->last_insert_len_, &s->commands_[s->num_commands_],
1078
+ &s->num_commands_, &s->num_literals_);
1079
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1080
+ } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1081
+ BROTLI_DCHECK(s->params.hasher.type == 10);
1082
+ BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1083
+ data, mask, literal_context_lut, &s->params,
1084
+ &s->hasher_, s->dist_cache_,
1085
+ &s->last_insert_len_, &s->commands_[s->num_commands_],
1086
+ &s->num_commands_, &s->num_literals_);
1087
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1088
+ } else {
1089
+ BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1090
+ data, mask, literal_context_lut, &s->params,
1091
+ &s->hasher_, s->dist_cache_,
1092
+ &s->last_insert_len_, &s->commands_[s->num_commands_],
1093
+ &s->num_commands_, &s->num_literals_);
1094
+ }
1095
+
1096
+ {
1097
+ const size_t max_length = MaxMetablockSize(&s->params);
1098
+ const size_t max_literals = max_length / 8;
1099
+ const size_t max_commands = max_length / 8;
1100
+ const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1101
+ /* If maximal possible additional block doesn't fit metablock, flush now. */
1102
+ /* TODO: Postpone decision until next block arrives? */
1103
+ const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1104
+ processed_bytes + InputBlockSize(s) <= max_length);
1105
+ /* If block splitting is not used, then flush as soon as there is some
1106
+ amount of commands / literals produced. */
1107
+ const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1108
+ s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1109
+ s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1110
+ if (!is_last && !force_flush && !should_flush &&
1111
+ next_input_fits_metablock &&
1112
+ s->num_literals_ < max_literals &&
1113
+ s->num_commands_ < max_commands) {
1114
+ /* Merge with next input block. Everything will happen later. */
1115
+ if (UpdateLastProcessedPos(s)) {
1116
+ HasherReset(&s->hasher_);
1117
+ }
1118
+ *out_size = 0;
1119
+ return BROTLI_TRUE;
1120
+ }
1121
+ }
1122
+
1123
+ /* Create the last insert-only command. */
1124
+ if (s->last_insert_len_ > 0) {
1125
+ InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1126
+ s->num_literals_ += s->last_insert_len_;
1127
+ s->last_insert_len_ = 0;
1128
+ }
1129
+
1130
+ if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1131
+ /* We have no new input data and we don't have to finish the stream, so
1132
+ nothing to do. */
1133
+ *out_size = 0;
1134
+ return BROTLI_TRUE;
1135
+ }
1136
+ BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1137
+ BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1138
+ BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1139
+ {
1140
+ const uint32_t metablock_size =
1141
+ (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1142
+ uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1143
+ size_t storage_ix = s->last_bytes_bits_;
1144
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1145
+ storage[0] = (uint8_t)s->last_bytes_;
1146
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1147
+ WriteMetaBlockInternal(
1148
+ m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1149
+ literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1150
+ s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1151
+ s->dist_cache_, &storage_ix, storage);
1152
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1153
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1154
+ s->last_bytes_bits_ = storage_ix & 7u;
1155
+ s->last_flush_pos_ = s->input_pos_;
1156
+ if (UpdateLastProcessedPos(s)) {
1157
+ HasherReset(&s->hasher_);
1158
+ }
1159
+ if (s->last_flush_pos_ > 0) {
1160
+ s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1161
+ }
1162
+ if (s->last_flush_pos_ > 1) {
1163
+ s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1164
+ }
1165
+ s->num_commands_ = 0;
1166
+ s->num_literals_ = 0;
1167
+ /* Save the state of the distance cache in case we need to restore it for
1168
+ emitting an uncompressed block. */
1169
+ memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1170
+ *output = &storage[0];
1171
+ *out_size = storage_ix >> 3;
1172
+ return BROTLI_TRUE;
1173
+ }
1174
+ }
1175
+
1176
+ /* Dumps remaining output bits and metadata header to |header|.
1177
+ Returns number of produced bytes.
1178
+ REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1179
+ REQUIRED: |block_size| <= (1 << 24). */
1180
+ static size_t WriteMetadataHeader(
1181
+ BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1182
+ size_t storage_ix;
1183
+ storage_ix = s->last_bytes_bits_;
1184
+ header[0] = (uint8_t)s->last_bytes_;
1185
+ header[1] = (uint8_t)(s->last_bytes_ >> 8);
1186
+ s->last_bytes_ = 0;
1187
+ s->last_bytes_bits_ = 0;
1188
+
1189
+ BrotliWriteBits(1, 0, &storage_ix, header);
1190
+ BrotliWriteBits(2, 3, &storage_ix, header);
1191
+ BrotliWriteBits(1, 0, &storage_ix, header);
1192
+ if (block_size == 0) {
1193
+ BrotliWriteBits(2, 0, &storage_ix, header);
1194
+ } else {
1195
+ uint32_t nbits = (block_size == 1) ? 0 :
1196
+ (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1197
+ uint32_t nbytes = (nbits + 7) / 8;
1198
+ BrotliWriteBits(2, nbytes, &storage_ix, header);
1199
+ BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1200
+ }
1201
+ return (storage_ix + 7u) >> 3;
1202
+ }
1203
+
1204
+ static BROTLI_BOOL BrotliCompressBufferQuality10(
1205
+ int lgwin, size_t input_size, const uint8_t* input_buffer,
1206
+ size_t* encoded_size, uint8_t* encoded_buffer) {
1207
+ MemoryManager memory_manager;
1208
+ MemoryManager* m = &memory_manager;
1209
+
1210
+ const size_t mask = BROTLI_SIZE_MAX >> 1;
1211
+ int dist_cache[4] = { 4, 11, 15, 16 };
1212
+ int saved_dist_cache[4] = { 4, 11, 15, 16 };
1213
+ BROTLI_BOOL ok = BROTLI_TRUE;
1214
+ const size_t max_out_size = *encoded_size;
1215
+ size_t total_out_size = 0;
1216
+ uint16_t last_bytes;
1217
+ uint8_t last_bytes_bits;
1218
+
1219
+ const size_t hasher_eff_size = BROTLI_MIN(size_t,
1220
+ input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
1221
+
1222
+ BrotliEncoderParams params;
1223
+
1224
+ const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
1225
+ size_t max_block_size;
1226
+ const size_t max_metablock_size = (size_t)1 << lgmetablock;
1227
+ const size_t max_literals_per_metablock = max_metablock_size / 8;
1228
+ const size_t max_commands_per_metablock = max_metablock_size / 8;
1229
+ size_t metablock_start = 0;
1230
+ uint8_t prev_byte = 0;
1231
+ uint8_t prev_byte2 = 0;
1232
+
1233
+ Hasher hasher;
1234
+ HasherInit(&hasher);
1235
+
1236
+ BrotliEncoderInitParams(&params);
1237
+ params.quality = 10;
1238
+ params.lgwin = lgwin;
1239
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1240
+ params.large_window = BROTLI_TRUE;
1241
+ }
1242
+ SanitizeParams(&params);
1243
+ params.lgblock = ComputeLgBlock(&params);
1244
+ ChooseDistanceParams(&params);
1245
+ max_block_size = (size_t)1 << params.lgblock;
1246
+
1247
+ BrotliInitMemoryManager(m, 0, 0, 0);
1248
+
1249
+ BROTLI_DCHECK(input_size <= mask + 1);
1250
+ EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
1251
+ InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
1252
+ 0, hasher_eff_size, BROTLI_TRUE);
1253
+ if (BROTLI_IS_OOM(m)) goto oom;
1254
+
1255
+ while (ok && metablock_start < input_size) {
1256
+ const size_t metablock_end =
1257
+ BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
1258
+ const size_t expected_num_commands =
1259
+ (metablock_end - metablock_start) / 12 + 16;
1260
+ Command* commands = 0;
1261
+ size_t num_commands = 0;
1262
+ size_t last_insert_len = 0;
1263
+ size_t num_literals = 0;
1264
+ size_t metablock_size = 0;
1265
+ size_t cmd_alloc_size = 0;
1266
+ BROTLI_BOOL is_last;
1267
+ uint8_t* storage;
1268
+ size_t storage_ix;
1269
+
1270
+ ContextType literal_context_mode = ChooseContextMode(&params,
1271
+ input_buffer, metablock_start, mask, metablock_end - metablock_start);
1272
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1273
+
1274
+ size_t block_start;
1275
+ for (block_start = metablock_start; block_start < metablock_end; ) {
1276
+ size_t block_size =
1277
+ BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
1278
+ ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
1279
+ size_t path_size;
1280
+ size_t new_cmd_alloc_size;
1281
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
1282
+ BrotliInitZopfliNodes(nodes, block_size + 1);
1283
+ StitchToPreviousBlockH10(&hasher.privat._H10, block_size, block_start,
1284
+ input_buffer, mask);
1285
+ path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
1286
+ input_buffer, mask, literal_context_lut, &params, dist_cache, &hasher,
1287
+ nodes);
1288
+ if (BROTLI_IS_OOM(m)) goto oom;
1289
+ /* We allocate a command buffer in the first iteration of this loop that
1290
+ will be likely big enough for the whole metablock, so that for most
1291
+ inputs we will not have to reallocate in later iterations. We do the
1292
+ allocation here and not before the loop, because if the input is small,
1293
+ this will be allocated after the Zopfli cost model is freed, so this
1294
+ will not increase peak memory usage.
1295
+ TODO: If the first allocation is too small, increase command
1296
+ buffer size exponentially. */
1297
+ new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
1298
+ num_commands + path_size + 1);
1299
+ if (cmd_alloc_size != new_cmd_alloc_size) {
1300
+ Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
1301
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom;
1302
+ cmd_alloc_size = new_cmd_alloc_size;
1303
+ if (commands) {
1304
+ memcpy(new_commands, commands, sizeof(Command) * num_commands);
1305
+ BROTLI_FREE(m, commands);
1306
+ }
1307
+ commands = new_commands;
1308
+ }
1309
+ BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
1310
+ &last_insert_len, &params, &commands[num_commands], &num_literals);
1311
+ num_commands += path_size;
1312
+ block_start += block_size;
1313
+ metablock_size += block_size;
1314
+ BROTLI_FREE(m, nodes);
1315
+ if (num_literals > max_literals_per_metablock ||
1316
+ num_commands > max_commands_per_metablock) {
1317
+ break;
1318
+ }
1319
+ }
1320
+
1321
+ if (last_insert_len > 0) {
1322
+ InitInsertCommand(&commands[num_commands++], last_insert_len);
1323
+ num_literals += last_insert_len;
1324
+ }
1325
+
1326
+ is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
1327
+ storage = NULL;
1328
+ storage_ix = last_bytes_bits;
1329
+
1330
+ if (metablock_size == 0) {
1331
+ /* Write the ISLAST and ISEMPTY bits. */
1332
+ storage = BROTLI_ALLOC(m, uint8_t, 16);
1333
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1334
+ storage[0] = (uint8_t)last_bytes;
1335
+ storage[1] = (uint8_t)(last_bytes >> 8);
1336
+ BrotliWriteBits(2, 3, &storage_ix, storage);
1337
+ storage_ix = (storage_ix + 7u) & ~7u;
1338
+ } else if (!ShouldCompress(input_buffer, mask, metablock_start,
1339
+ metablock_size, num_literals, num_commands)) {
1340
+ /* Restore the distance cache, as its last update by
1341
+ CreateBackwardReferences is now unused. */
1342
+ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1343
+ storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
1344
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1345
+ storage[0] = (uint8_t)last_bytes;
1346
+ storage[1] = (uint8_t)(last_bytes >> 8);
1347
+ BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1348
+ metablock_start, mask, metablock_size,
1349
+ &storage_ix, storage);
1350
+ } else {
1351
+ MetaBlockSplit mb;
1352
+ BrotliEncoderParams block_params = params;
1353
+ InitMetaBlockSplit(&mb);
1354
+ BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
1355
+ &block_params,
1356
+ prev_byte, prev_byte2,
1357
+ commands, num_commands,
1358
+ literal_context_mode,
1359
+ &mb);
1360
+ if (BROTLI_IS_OOM(m)) goto oom;
1361
+ {
1362
+ /* The number of distance symbols effectively used for distance
1363
+ histograms. It might be less than distance alphabet size
1364
+ for "Large Window Brotli" (32-bit). */
1365
+ BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
1366
+ }
1367
+ storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
1368
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1369
+ storage[0] = (uint8_t)last_bytes;
1370
+ storage[1] = (uint8_t)(last_bytes >> 8);
1371
+ BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
1372
+ mask, prev_byte, prev_byte2,
1373
+ is_last,
1374
+ &block_params,
1375
+ literal_context_mode,
1376
+ commands, num_commands,
1377
+ &mb,
1378
+ &storage_ix, storage);
1379
+ if (BROTLI_IS_OOM(m)) goto oom;
1380
+ if (metablock_size + 4 < (storage_ix >> 3)) {
1381
+ /* Restore the distance cache and last byte. */
1382
+ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1383
+ storage[0] = (uint8_t)last_bytes;
1384
+ storage[1] = (uint8_t)(last_bytes >> 8);
1385
+ storage_ix = last_bytes_bits;
1386
+ BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1387
+ metablock_start, mask,
1388
+ metablock_size, &storage_ix, storage);
1389
+ }
1390
+ DestroyMetaBlockSplit(m, &mb);
1391
+ }
1392
+ last_bytes = (uint16_t)(storage[storage_ix >> 3]);
1393
+ last_bytes_bits = storage_ix & 7u;
1394
+ metablock_start += metablock_size;
1395
+ if (metablock_start < input_size) {
1396
+ prev_byte = input_buffer[metablock_start - 1];
1397
+ prev_byte2 = input_buffer[metablock_start - 2];
1398
+ }
1399
+ /* Save the state of the distance cache in case we need to restore it for
1400
+ emitting an uncompressed block. */
1401
+ memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
1402
+
1403
+ {
1404
+ const size_t out_size = storage_ix >> 3;
1405
+ total_out_size += out_size;
1406
+ if (total_out_size <= max_out_size) {
1407
+ memcpy(encoded_buffer, storage, out_size);
1408
+ encoded_buffer += out_size;
1409
+ } else {
1410
+ ok = BROTLI_FALSE;
1411
+ }
1412
+ }
1413
+ BROTLI_FREE(m, storage);
1414
+ BROTLI_FREE(m, commands);
1415
+ }
1416
+
1417
+ *encoded_size = total_out_size;
1418
+ DestroyHasher(m, &hasher);
1419
+ return ok;
1420
+
1421
+ oom:
1422
+ BrotliWipeOutMemoryManager(m);
1423
+ return BROTLI_FALSE;
1424
+ }
1425
+
1426
+ size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1427
+ /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1428
+ size_t num_large_blocks = input_size >> 14;
1429
+ size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1430
+ size_t result = input_size + overhead;
1431
+ if (input_size == 0) return 2;
1432
+ return (result < input_size) ? 0 : result;
1433
+ }
1434
+
1435
+ /* Wraps data to uncompressed brotli stream with minimal window size.
1436
+ |output| should point at region with at least BrotliEncoderMaxCompressedSize
1437
+ addressable bytes.
1438
+ Returns the length of stream. */
1439
+ static size_t MakeUncompressedStream(
1440
+ const uint8_t* input, size_t input_size, uint8_t* output) {
1441
+ size_t size = input_size;
1442
+ size_t result = 0;
1443
+ size_t offset = 0;
1444
+ if (input_size == 0) {
1445
+ output[0] = 6;
1446
+ return 1;
1447
+ }
1448
+ output[result++] = 0x21; /* window bits = 10, is_last = false */
1449
+ output[result++] = 0x03; /* empty metadata, padding */
1450
+ while (size > 0) {
1451
+ uint32_t nibbles = 0;
1452
+ uint32_t chunk_size;
1453
+ uint32_t bits;
1454
+ chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1455
+ if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1456
+ bits =
1457
+ (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1458
+ output[result++] = (uint8_t)bits;
1459
+ output[result++] = (uint8_t)(bits >> 8);
1460
+ output[result++] = (uint8_t)(bits >> 16);
1461
+ if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1462
+ memcpy(&output[result], &input[offset], chunk_size);
1463
+ result += chunk_size;
1464
+ offset += chunk_size;
1465
+ size -= chunk_size;
1466
+ }
1467
+ output[result++] = 3;
1468
+ return result;
1469
+ }
1470
+
1471
+ BROTLI_BOOL BrotliEncoderCompress(
1472
+ int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1473
+ const uint8_t* input_buffer, size_t* encoded_size,
1474
+ uint8_t* encoded_buffer) {
1475
+ BrotliEncoderState* s;
1476
+ size_t out_size = *encoded_size;
1477
+ const uint8_t* input_start = input_buffer;
1478
+ uint8_t* output_start = encoded_buffer;
1479
+ size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1480
+ if (out_size == 0) {
1481
+ /* Output buffer needs at least one byte. */
1482
+ return BROTLI_FALSE;
1483
+ }
1484
+ if (input_size == 0) {
1485
+ /* Handle the special case of empty input. */
1486
+ *encoded_size = 1;
1487
+ *encoded_buffer = 6;
1488
+ return BROTLI_TRUE;
1489
+ }
1490
+ if (quality == 10) {
1491
+ /* TODO: Implement this direct path for all quality levels. */
1492
+ const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
1493
+ BROTLI_MAX(int, 16, lgwin));
1494
+ int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
1495
+ encoded_size, encoded_buffer);
1496
+ if (!ok || (max_out_size && *encoded_size > max_out_size)) {
1497
+ goto fallback;
1498
+ }
1499
+ return BROTLI_TRUE;
1500
+ }
1501
+
1502
+ s = BrotliEncoderCreateInstance(0, 0, 0);
1503
+ if (!s) {
1504
+ return BROTLI_FALSE;
1505
+ } else {
1506
+ size_t available_in = input_size;
1507
+ const uint8_t* next_in = input_buffer;
1508
+ size_t available_out = *encoded_size;
1509
+ uint8_t* next_out = encoded_buffer;
1510
+ size_t total_out = 0;
1511
+ BROTLI_BOOL result = BROTLI_FALSE;
1512
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1513
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1514
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1515
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1516
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1517
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1518
+ }
1519
+ result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1520
+ &available_in, &next_in, &available_out, &next_out, &total_out);
1521
+ if (!BrotliEncoderIsFinished(s)) result = 0;
1522
+ *encoded_size = total_out;
1523
+ BrotliEncoderDestroyInstance(s);
1524
+ if (!result || (max_out_size && *encoded_size > max_out_size)) {
1525
+ goto fallback;
1526
+ }
1527
+ return BROTLI_TRUE;
1528
+ }
1529
+ fallback:
1530
+ *encoded_size = 0;
1531
+ if (!max_out_size) return BROTLI_FALSE;
1532
+ if (out_size >= max_out_size) {
1533
+ *encoded_size =
1534
+ MakeUncompressedStream(input_start, input_size, output_start);
1535
+ return BROTLI_TRUE;
1536
+ }
1537
+ return BROTLI_FALSE;
1538
+ }
1539
+
1540
+ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1541
+ uint32_t seal = s->last_bytes_;
1542
+ size_t seal_bits = s->last_bytes_bits_;
1543
+ uint8_t* destination;
1544
+ s->last_bytes_ = 0;
1545
+ s->last_bytes_bits_ = 0;
1546
+ /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1547
+ seal |= 0x6u << seal_bits;
1548
+ seal_bits += 6;
1549
+ /* If we have already created storage, then append to it.
1550
+ Storage is valid until next block is being compressed. */
1551
+ if (s->next_out_) {
1552
+ destination = s->next_out_ + s->available_out_;
1553
+ } else {
1554
+ destination = s->tiny_buf_.u8;
1555
+ s->next_out_ = destination;
1556
+ }
1557
+ destination[0] = (uint8_t)seal;
1558
+ if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1559
+ if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1560
+ s->available_out_ += (seal_bits + 7) >> 3;
1561
+ }
1562
+
1563
+ /* Injects padding bits or pushes compressed data to output.
1564
+ Returns false if nothing is done. */
1565
+ static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1566
+ size_t* available_out, uint8_t** next_out, size_t* total_out) {
1567
+ if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1568
+ s->last_bytes_bits_ != 0) {
1569
+ InjectBytePaddingBlock(s);
1570
+ return BROTLI_TRUE;
1571
+ }
1572
+
1573
+ if (s->available_out_ != 0 && *available_out != 0) {
1574
+ size_t copy_output_size =
1575
+ BROTLI_MIN(size_t, s->available_out_, *available_out);
1576
+ memcpy(*next_out, s->next_out_, copy_output_size);
1577
+ *next_out += copy_output_size;
1578
+ *available_out -= copy_output_size;
1579
+ s->next_out_ += copy_output_size;
1580
+ s->available_out_ -= copy_output_size;
1581
+ s->total_out_ += copy_output_size;
1582
+ if (total_out) *total_out = s->total_out_;
1583
+ return BROTLI_TRUE;
1584
+ }
1585
+
1586
+ return BROTLI_FALSE;
1587
+ }
1588
+
1589
+ static void CheckFlushComplete(BrotliEncoderState* s) {
1590
+ if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1591
+ s->available_out_ == 0) {
1592
+ s->stream_state_ = BROTLI_STREAM_PROCESSING;
1593
+ s->next_out_ = 0;
1594
+ }
1595
+ }
1596
+
1597
+ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1598
+ BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1599
+ const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1600
+ size_t* total_out) {
1601
+ const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1602
+ const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1603
+ BROTLI_MIN(size_t, *available_in, block_size_limit));
1604
+ uint32_t* tmp_command_buf = NULL;
1605
+ uint32_t* command_buf = NULL;
1606
+ uint8_t* tmp_literal_buf = NULL;
1607
+ uint8_t* literal_buf = NULL;
1608
+ MemoryManager* m = &s->memory_manager_;
1609
+ if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1610
+ s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1611
+ return BROTLI_FALSE;
1612
+ }
1613
+ if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1614
+ if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1615
+ s->command_buf_ =
1616
+ BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1617
+ s->literal_buf_ =
1618
+ BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1619
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1620
+ BROTLI_IS_NULL(s->literal_buf_)) {
1621
+ return BROTLI_FALSE;
1622
+ }
1623
+ }
1624
+ if (s->command_buf_) {
1625
+ command_buf = s->command_buf_;
1626
+ literal_buf = s->literal_buf_;
1627
+ } else {
1628
+ tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1629
+ tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1630
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1631
+ BROTLI_IS_NULL(tmp_literal_buf)) {
1632
+ return BROTLI_FALSE;
1633
+ }
1634
+ command_buf = tmp_command_buf;
1635
+ literal_buf = tmp_literal_buf;
1636
+ }
1637
+ }
1638
+
1639
+ while (BROTLI_TRUE) {
1640
+ if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1641
+ continue;
1642
+ }
1643
+
1644
+ /* Compress block only when internal output buffer is empty, stream is not
1645
+ finished, there is no pending flush request, and there is either
1646
+ additional input or pending operation. */
1647
+ if (s->available_out_ == 0 &&
1648
+ s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1649
+ (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1650
+ size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1651
+ BROTLI_BOOL is_last =
1652
+ (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1653
+ BROTLI_BOOL force_flush =
1654
+ (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1655
+ size_t max_out_size = 2 * block_size + 503;
1656
+ BROTLI_BOOL inplace = BROTLI_TRUE;
1657
+ uint8_t* storage = NULL;
1658
+ size_t storage_ix = s->last_bytes_bits_;
1659
+ size_t table_size;
1660
+ int* table;
1661
+
1662
+ if (force_flush && block_size == 0) {
1663
+ s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1664
+ continue;
1665
+ }
1666
+ if (max_out_size <= *available_out) {
1667
+ storage = *next_out;
1668
+ } else {
1669
+ inplace = BROTLI_FALSE;
1670
+ storage = GetBrotliStorage(s, max_out_size);
1671
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1672
+ }
1673
+ storage[0] = (uint8_t)s->last_bytes_;
1674
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1675
+ table = GetHashTable(s, s->params.quality, block_size, &table_size);
1676
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1677
+
1678
+ if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1679
+ BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,
1680
+ table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,
1681
+ s->cmd_code_, &storage_ix, storage);
1682
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1683
+ } else {
1684
+ BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,
1685
+ command_buf, literal_buf, table, table_size,
1686
+ &storage_ix, storage);
1687
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1688
+ }
1689
+ if (block_size != 0) {
1690
+ *next_in += block_size;
1691
+ *available_in -= block_size;
1692
+ }
1693
+ if (inplace) {
1694
+ size_t out_bytes = storage_ix >> 3;
1695
+ BROTLI_DCHECK(out_bytes <= *available_out);
1696
+ BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1697
+ *next_out += out_bytes;
1698
+ *available_out -= out_bytes;
1699
+ s->total_out_ += out_bytes;
1700
+ if (total_out) *total_out = s->total_out_;
1701
+ } else {
1702
+ size_t out_bytes = storage_ix >> 3;
1703
+ s->next_out_ = storage;
1704
+ s->available_out_ = out_bytes;
1705
+ }
1706
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1707
+ s->last_bytes_bits_ = storage_ix & 7u;
1708
+
1709
+ if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1710
+ if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1711
+ continue;
1712
+ }
1713
+ break;
1714
+ }
1715
+ BROTLI_FREE(m, tmp_command_buf);
1716
+ BROTLI_FREE(m, tmp_literal_buf);
1717
+ CheckFlushComplete(s);
1718
+ return BROTLI_TRUE;
1719
+ }
1720
+
1721
+ static BROTLI_BOOL ProcessMetadata(
1722
+ BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1723
+ size_t* available_out, uint8_t** next_out, size_t* total_out) {
1724
+ if (*available_in > (1u << 24)) return BROTLI_FALSE;
1725
+ /* Switch to metadata block workflow, if required. */
1726
+ if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1727
+ s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1728
+ s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1729
+ }
1730
+ if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1731
+ s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1732
+ return BROTLI_FALSE;
1733
+ }
1734
+
1735
+ while (BROTLI_TRUE) {
1736
+ if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1737
+ continue;
1738
+ }
1739
+ if (s->available_out_ != 0) break;
1740
+
1741
+ if (s->input_pos_ != s->last_flush_pos_) {
1742
+ BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1743
+ &s->available_out_, &s->next_out_);
1744
+ if (!result) return BROTLI_FALSE;
1745
+ continue;
1746
+ }
1747
+
1748
+ if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1749
+ s->next_out_ = s->tiny_buf_.u8;
1750
+ s->available_out_ =
1751
+ WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1752
+ s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1753
+ continue;
1754
+ } else {
1755
+ /* Exit workflow only when there is no more input and no more output.
1756
+ Otherwise client may continue producing empty metadata blocks. */
1757
+ if (s->remaining_metadata_bytes_ == 0) {
1758
+ s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1759
+ s->stream_state_ = BROTLI_STREAM_PROCESSING;
1760
+ break;
1761
+ }
1762
+ if (*available_out) {
1763
+ /* Directly copy input to output. */
1764
+ uint32_t copy = (uint32_t)BROTLI_MIN(
1765
+ size_t, s->remaining_metadata_bytes_, *available_out);
1766
+ memcpy(*next_out, *next_in, copy);
1767
+ *next_in += copy;
1768
+ *available_in -= copy;
1769
+ s->remaining_metadata_bytes_ -= copy;
1770
+ *next_out += copy;
1771
+ *available_out -= copy;
1772
+ } else {
1773
+ /* This guarantees progress in "TakeOutput" workflow. */
1774
+ uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1775
+ s->next_out_ = s->tiny_buf_.u8;
1776
+ memcpy(s->next_out_, *next_in, copy);
1777
+ *next_in += copy;
1778
+ *available_in -= copy;
1779
+ s->remaining_metadata_bytes_ -= copy;
1780
+ s->available_out_ = copy;
1781
+ }
1782
+ continue;
1783
+ }
1784
+ }
1785
+
1786
+ return BROTLI_TRUE;
1787
+ }
1788
+
1789
+ static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1790
+ if (s->params.size_hint == 0) {
1791
+ uint64_t delta = UnprocessedInputSize(s);
1792
+ uint64_t tail = available_in;
1793
+ uint32_t limit = 1u << 30;
1794
+ uint32_t total;
1795
+ if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1796
+ total = limit;
1797
+ } else {
1798
+ total = (uint32_t)(delta + tail);
1799
+ }
1800
+ s->params.size_hint = total;
1801
+ }
1802
+ }
1803
+
1804
+ BROTLI_BOOL BrotliEncoderCompressStream(
1805
+ BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1806
+ const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
1807
+ size_t* total_out) {
1808
+ if (!EnsureInitialized(s)) return BROTLI_FALSE;
1809
+
1810
+ /* Unfinished metadata block; check requirements. */
1811
+ if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1812
+ if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1813
+ if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1814
+ }
1815
+
1816
+ if (op == BROTLI_OPERATION_EMIT_METADATA) {
1817
+ UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
1818
+ return ProcessMetadata(
1819
+ s, available_in, next_in, available_out, next_out, total_out);
1820
+ }
1821
+
1822
+ if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1823
+ s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1824
+ return BROTLI_FALSE;
1825
+ }
1826
+
1827
+ if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1828
+ return BROTLI_FALSE;
1829
+ }
1830
+ if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1831
+ s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1832
+ return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1833
+ available_out, next_out, total_out);
1834
+ }
1835
+ while (BROTLI_TRUE) {
1836
+ size_t remaining_block_size = RemainingInputBlockSize(s);
1837
+ /* Shorten input to flint size. */
1838
+ if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1839
+ remaining_block_size = (size_t)s->flint_;
1840
+ }
1841
+
1842
+ if (remaining_block_size != 0 && *available_in != 0) {
1843
+ size_t copy_input_size =
1844
+ BROTLI_MIN(size_t, remaining_block_size, *available_in);
1845
+ CopyInputToRingBuffer(s, copy_input_size, *next_in);
1846
+ *next_in += copy_input_size;
1847
+ *available_in -= copy_input_size;
1848
+ if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1849
+ continue;
1850
+ }
1851
+
1852
+ if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1853
+ /* Exit the "emit flint" workflow. */
1854
+ if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1855
+ CheckFlushComplete(s);
1856
+ if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1857
+ s->flint_ = BROTLI_FLINT_DONE;
1858
+ }
1859
+ }
1860
+ continue;
1861
+ }
1862
+
1863
+ /* Compress data only when internal output buffer is empty, stream is not
1864
+ finished and there is no pending flush request. */
1865
+ if (s->available_out_ == 0 &&
1866
+ s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1867
+ if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1868
+ BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1869
+ (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1870
+ BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1871
+ (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1872
+ BROTLI_BOOL result;
1873
+ /* Force emitting (uncompressed) piece containing flint. */
1874
+ if (!is_last && s->flint_ == 0) {
1875
+ s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1876
+ force_flush = BROTLI_TRUE;
1877
+ }
1878
+ UpdateSizeHint(s, *available_in);
1879
+ result = EncodeData(s, is_last, force_flush,
1880
+ &s->available_out_, &s->next_out_);
1881
+ if (!result) return BROTLI_FALSE;
1882
+ if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1883
+ if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1884
+ continue;
1885
+ }
1886
+ }
1887
+ break;
1888
+ }
1889
+ CheckFlushComplete(s);
1890
+ return BROTLI_TRUE;
1891
+ }
1892
+
1893
+ BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1894
+ return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1895
+ !BrotliEncoderHasMoreOutput(s));
1896
+ }
1897
+
1898
+ BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1899
+ return TO_BROTLI_BOOL(s->available_out_ != 0);
1900
+ }
1901
+
1902
+ const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1903
+ size_t consumed_size = s->available_out_;
1904
+ uint8_t* result = s->next_out_;
1905
+ if (*size) {
1906
+ consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1907
+ }
1908
+ if (consumed_size) {
1909
+ s->next_out_ += consumed_size;
1910
+ s->available_out_ -= consumed_size;
1911
+ s->total_out_ += consumed_size;
1912
+ CheckFlushComplete(s);
1913
+ *size = consumed_size;
1914
+ } else {
1915
+ *size = 0;
1916
+ result = 0;
1917
+ }
1918
+ return result;
1919
+ }
1920
+
1921
+ uint32_t BrotliEncoderVersion(void) {
1922
+ return BROTLI_VERSION;
1923
+ }
1924
+
1925
+ #if defined(__cplusplus) || defined(c_plusplus)
1926
+ } /* extern "C" */
1927
+ #endif