brotli 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +6 -3
  3. data/.github/workflows/publish.yml +7 -17
  4. data/.gitmodules +1 -1
  5. data/README.md +2 -2
  6. data/ext/brotli/brotli.c +1 -0
  7. data/ext/brotli/extconf.rb +6 -0
  8. data/lib/brotli/version.rb +1 -1
  9. data/test/brotli_test.rb +4 -1
  10. data/vendor/brotli/c/common/constants.c +1 -1
  11. data/vendor/brotli/c/common/constants.h +2 -1
  12. data/vendor/brotli/c/common/context.c +1 -1
  13. data/vendor/brotli/c/common/dictionary.c +5 -3
  14. data/vendor/brotli/c/common/platform.c +2 -1
  15. data/vendor/brotli/c/common/platform.h +60 -113
  16. data/vendor/brotli/c/common/shared_dictionary.c +521 -0
  17. data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  18. data/vendor/brotli/c/common/transform.c +1 -1
  19. data/vendor/brotli/c/common/version.h +31 -6
  20. data/vendor/brotli/c/dec/bit_reader.c +10 -8
  21. data/vendor/brotli/c/dec/bit_reader.h +172 -100
  22. data/vendor/brotli/c/dec/decode.c +467 -200
  23. data/vendor/brotli/c/dec/huffman.c +7 -4
  24. data/vendor/brotli/c/dec/huffman.h +2 -1
  25. data/vendor/brotli/c/dec/prefix.h +2 -1
  26. data/vendor/brotli/c/dec/state.c +33 -9
  27. data/vendor/brotli/c/dec/state.h +70 -35
  28. data/vendor/brotli/c/enc/backward_references.c +81 -19
  29. data/vendor/brotli/c/enc/backward_references.h +5 -4
  30. data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
  31. data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
  32. data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
  33. data/vendor/brotli/c/enc/bit_cost.c +8 -7
  34. data/vendor/brotli/c/enc/bit_cost.h +5 -4
  35. data/vendor/brotli/c/enc/block_splitter.c +37 -14
  36. data/vendor/brotli/c/enc/block_splitter.h +5 -4
  37. data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
  38. data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
  39. data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
  40. data/vendor/brotli/c/enc/cluster.c +10 -9
  41. data/vendor/brotli/c/enc/cluster.h +7 -6
  42. data/vendor/brotli/c/enc/cluster_inc.h +25 -20
  43. data/vendor/brotli/c/enc/command.c +1 -1
  44. data/vendor/brotli/c/enc/command.h +5 -4
  45. data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  46. data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  47. data/vendor/brotli/c/enc/compress_fragment.c +93 -83
  48. data/vendor/brotli/c/enc/compress_fragment.h +32 -7
  49. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
  50. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
  51. data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
  52. data/vendor/brotli/c/enc/encode.c +473 -404
  53. data/vendor/brotli/c/enc/encoder_dict.c +611 -4
  54. data/vendor/brotli/c/enc/encoder_dict.h +117 -3
  55. data/vendor/brotli/c/enc/entropy_encode.c +3 -2
  56. data/vendor/brotli/c/enc/entropy_encode.h +2 -1
  57. data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
  58. data/vendor/brotli/c/enc/fast_log.c +1 -1
  59. data/vendor/brotli/c/enc/fast_log.h +2 -1
  60. data/vendor/brotli/c/enc/find_match_length.h +15 -22
  61. data/vendor/brotli/c/enc/hash.h +285 -45
  62. data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
  63. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
  64. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
  65. data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
  66. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
  67. data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
  68. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
  69. data/vendor/brotli/c/enc/histogram.c +4 -4
  70. data/vendor/brotli/c/enc/histogram.h +7 -6
  71. data/vendor/brotli/c/enc/literal_cost.c +20 -15
  72. data/vendor/brotli/c/enc/literal_cost.h +4 -2
  73. data/vendor/brotli/c/enc/memory.c +29 -5
  74. data/vendor/brotli/c/enc/memory.h +19 -2
  75. data/vendor/brotli/c/enc/metablock.c +72 -58
  76. data/vendor/brotli/c/enc/metablock.h +9 -8
  77. data/vendor/brotli/c/enc/metablock_inc.h +8 -6
  78. data/vendor/brotli/c/enc/params.h +4 -3
  79. data/vendor/brotli/c/enc/prefix.h +3 -2
  80. data/vendor/brotli/c/enc/quality.h +40 -3
  81. data/vendor/brotli/c/enc/ringbuffer.h +4 -3
  82. data/vendor/brotli/c/enc/state.h +104 -0
  83. data/vendor/brotli/c/enc/static_dict.c +60 -4
  84. data/vendor/brotli/c/enc/static_dict.h +3 -2
  85. data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
  86. data/vendor/brotli/c/enc/utf8_util.c +1 -1
  87. data/vendor/brotli/c/enc/utf8_util.h +2 -1
  88. data/vendor/brotli/c/enc/write_bits.h +2 -1
  89. data/vendor/brotli/c/include/brotli/decode.h +67 -2
  90. data/vendor/brotli/c/include/brotli/encode.h +55 -2
  91. data/vendor/brotli/c/include/brotli/port.h +28 -11
  92. data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  93. metadata +9 -3
@@ -4,18 +4,45 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- #include "./encoder_dict.h"
7
+ #include "encoder_dict.h"
8
+
9
+ #include <stdlib.h> /* malloc, free */
8
10
 
9
11
  #include "../common/dictionary.h"
12
+ #include "../common/platform.h"
13
+ #include "../common/shared_dictionary_internal.h"
10
14
  #include "../common/transform.h"
11
- #include "./dictionary_hash.h"
12
- #include "./hash.h"
15
+ #include "compound_dictionary.h"
16
+ #include "dictionary_hash.h"
17
+ #include "memory.h"
18
+ #include "quality.h"
19
+ #include "hash.h"
13
20
 
14
21
  #if defined(__cplusplus) || defined(c_plusplus)
15
22
  extern "C" {
16
23
  #endif
17
24
 
18
- void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
25
+ #define NUM_HASH_BITS 15u
26
+ #define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
27
+
28
+ static void BrotliTrieInit(BrotliTrie* trie) {
29
+ trie->pool_capacity = 0;
30
+ trie->pool_size = 0;
31
+ trie->pool = 0;
32
+
33
+ /* Set up the root node */
34
+ trie->root.single = 0;
35
+ trie->root.len_ = 0;
36
+ trie->root.idx_ = 0;
37
+ trie->root.sub = 0;
38
+ }
39
+
40
+ static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
41
+ BrotliFree(m, trie->pool);
42
+ }
43
+
44
+ /* Initializes to RFC 7932 static dictionary / transforms. */
45
+ static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
19
46
  dict->words = BrotliGetDictionary();
20
47
  dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
21
48
 
@@ -26,8 +53,588 @@ void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
26
53
 
27
54
  dict->cutoffTransformsCount = kCutoffTransformsCount;
28
55
  dict->cutoffTransforms = kCutoffTransforms;
56
+
57
+ dict->parent = 0;
58
+
59
+ dict->hash_table_data_words_ = 0;
60
+ dict->hash_table_data_lengths_ = 0;
61
+ dict->buckets_alloc_size_ = 0;
62
+ dict->buckets_data_ = 0;
63
+ dict->dict_words_alloc_size_ = 0;
64
+ dict->dict_words_data_ = 0;
65
+ dict->words_instance_ = 0;
66
+ dict->has_words_heavy = BROTLI_FALSE;
67
+ BrotliTrieInit(&dict->trie);
68
+ }
69
+
70
+ static void BrotliDestroyEncoderDictionary(MemoryManager* m,
71
+ BrotliEncoderDictionary* dict) {
72
+ BrotliFree(m, dict->hash_table_data_words_);
73
+ BrotliFree(m, dict->hash_table_data_lengths_);
74
+ BrotliFree(m, dict->buckets_data_);
75
+ BrotliFree(m, dict->dict_words_data_);
76
+ BrotliFree(m, dict->words_instance_);
77
+ BrotliTrieFree(m, &dict->trie);
78
+ }
79
+
80
+ #if defined(BROTLI_EXPERIMENTAL)
81
+ /* Word length must be at least 4 bytes */
82
+ static uint32_t Hash(const uint8_t* data, int bits) {
83
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
84
+ /* The higher bits contain more mixture from the multiplication,
85
+ so we take our results from there. */
86
+ return h >> (32 - bits);
87
+ }
88
+
89
+ /* Theoretical max possible word size after transform */
90
+ #define kTransformedBufferSize \
91
+ (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
92
+
93
+ /* To be safe buffer must have at least kTransformedBufferSize */
94
+ static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
95
+ const BrotliTransforms* transforms,
96
+ const BrotliEncoderDictionary* dict,
97
+ uint8_t* buffer, size_t* size) {
98
+ const uint8_t* dict_word = &dict->words->data[
99
+ dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
100
+ *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
101
+ transforms, transform);
102
+ }
103
+
104
+ static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
105
+ DictWord result;
106
+ result.len = len;
107
+ result.transform = transform;
108
+ result.idx = idx;
109
+ return result;
110
+ }
111
+
112
+ static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
113
+ BrotliTrieNode** keep) {
114
+ uint32_t result;
115
+ uint32_t keep_index = 0;
116
+ if (keep && *keep != &trie->root) {
117
+ /* Optional node to keep, since address may change after re-allocating */
118
+ keep_index = (uint32_t)(*keep - trie->pool);
119
+ }
120
+ if (trie->pool_size == 0) {
121
+ /* Have a dummy node in the front. We do not want the result to be 0, it
122
+ must be at least 1, 0 represents "null pointer" */
123
+ trie->pool_size = 1;
124
+ }
125
+ BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
126
+ trie->pool_size + num);
127
+ if (BROTLI_IS_OOM(m)) return 0;
128
+ /* Init the new nodes to empty */
129
+ memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
130
+ result = (uint32_t)trie->pool_size;
131
+ trie->pool_size += num;
132
+ if (keep && *keep != &trie->root) {
133
+ *keep = trie->pool + keep_index;
134
+ }
135
+ return result;
136
+ }
137
+
138
+ /**
139
+ * len and idx: payload for last node
140
+ * word, size: the string
141
+ * index: position in the string
142
+ */
143
+ static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
144
+ uint32_t idx, const uint8_t* word, size_t size, int index,
145
+ BrotliTrieNode* node, BrotliTrie* trie) {
146
+ BrotliTrieNode* child = 0;
147
+ uint8_t c;
148
+ if ((size_t)index == size) {
149
+ if (!node->len_ || idx < node->idx_) {
150
+ node->len_ = len;
151
+ node->idx_ = idx;
152
+ }
153
+ return BROTLI_TRUE;
154
+ }
155
+ c = word[index];
156
+ if (node->single && c != node->c) {
157
+ BrotliTrieNode old = trie->pool[node->sub];
158
+ uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
159
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
160
+ node->single = 0;
161
+ node->sub = new_nodes;
162
+ trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
163
+ trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
164
+ old;
165
+ }
166
+ if (!node->sub) {
167
+ uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
168
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
169
+ node->single = 1;
170
+ node->c = c;
171
+ node->sub = new_node;
172
+ }
173
+ if (node->single) {
174
+ child = &trie->pool[node->sub];
175
+ } else {
176
+ if (!trie->pool[node->sub + (c >> 4)].sub) {
177
+ uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
178
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
179
+ trie->pool[node->sub + (c >> 4)].sub = new_nodes;
180
+ }
181
+ child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
182
+ }
183
+ return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
184
+ }
185
+
186
+ static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
187
+ const uint8_t* word, size_t size, BrotliTrie* trie) {
188
+ return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
189
+ }
190
+
191
+ const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
192
+ const BrotliTrieNode* node, uint8_t c) {
193
+ BrotliTrieNode* temp_node;
194
+ if (node->single) {
195
+ if (node->c == c) return &trie->pool[node->sub];
196
+ return 0;
197
+ }
198
+ if (!node->sub) return 0;
199
+ temp_node = &trie->pool[node->sub + (c >> 4)];
200
+ if (!temp_node->sub) return 0;
201
+ return &trie->pool[temp_node->sub + (c & 15)];
202
+ }
203
+
204
+ static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
205
+ const uint8_t* word, size_t size) {
206
+ const BrotliTrieNode* node = &trie->root;
207
+ size_t i;
208
+ for (i = 0; i < size; i++) {
209
+ node = BrotliTrieSub(trie, node, word[i]);
210
+ if (!node) return 0;
211
+ }
212
+ return node;
213
+ }
214
+
215
+ static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
216
+ const BrotliTransforms* transforms,
217
+ BrotliEncoderDictionary* dict) {
218
+ uint32_t i;
219
+ DictWord* dict_words;
220
+ uint16_t* buckets;
221
+ DictWord** words_by_hash;
222
+ size_t* words_by_hash_size;
223
+ size_t* words_by_hash_capacity;
224
+ BrotliTrie dedup;
225
+ uint8_t word[kTransformedBufferSize];
226
+ size_t word_size;
227
+ size_t total = 0;
228
+ uint8_t l;
229
+ uint16_t idx;
230
+
231
+ BrotliTrieInit(&dedup);
232
+
233
+ words_by_hash = (DictWord**)BrotliAllocate(m,
234
+ sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
235
+ words_by_hash_size = (size_t*)BrotliAllocate(m,
236
+ sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
237
+ words_by_hash_capacity = (size_t*)BrotliAllocate(m,
238
+ sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
239
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
240
+ memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
241
+ memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
242
+ memset(words_by_hash_capacity, 0,
243
+ sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
244
+
245
+ if (transforms->num_transforms > 0) {
246
+ for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
247
+ l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
248
+ uint16_t n = dict->words->size_bits_by_length[l] ?
249
+ (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
250
+ for (idx = 0; idx < n; ++idx) {
251
+ uint32_t key;
252
+ /* First transform (usually identity) */
253
+ TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
254
+ &word_size);
255
+ /* Cannot hash words smaller than 4 bytes */
256
+ if (word_size < 4) {
257
+ /* Break instead of continue, all next words of this length will have
258
+ same length after transform */
259
+ break;
260
+ }
261
+ if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
262
+ return BROTLI_FALSE;
263
+ }
264
+ key = Hash(word, NUM_HASH_BITS);
265
+ BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
266
+ words_by_hash_capacity[key], words_by_hash_size[key],
267
+ MakeDictWord(l, 0, idx));
268
+ ++total;
269
+ }
270
+ }
271
+ }
272
+
273
+ /* These LUT transforms only supported if no custom transforms. This is
274
+ ok, we will use the heavy trie instead. */
275
+ if (transforms == BrotliGetTransforms()) {
276
+ for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
277
+ l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
278
+ uint16_t n = dict->words->size_bits_by_length[l] ?
279
+ (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
280
+ for (idx = 0; idx < n; ++idx) {
281
+ int k;
282
+ BROTLI_BOOL is_ascii = BROTLI_TRUE;
283
+ size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
284
+ const uint8_t* data = &dict->words->data[offset];
285
+ for (k = 0; k < l; ++k) {
286
+ if (data[k] >= 128) is_ascii = BROTLI_FALSE;
287
+ }
288
+ if (data[0] < 128) {
289
+ int transform = 9; /* {empty, uppercase first, empty} */
290
+ uint32_t ix = idx + (uint32_t)transform * n;
291
+ const BrotliTrieNode* it;
292
+ TransformedDictionaryWord(idx, l, transform, transforms,
293
+ dict, word, &word_size);
294
+ it = BrotliTrieFind(&dedup, word, word_size);
295
+ if (!it || it->idx_ > ix) {
296
+ uint32_t key = Hash(word, NUM_HASH_BITS);
297
+ if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
298
+ return BROTLI_FALSE;
299
+ }
300
+ BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
301
+ words_by_hash_capacity[key], words_by_hash_size[key],
302
+ MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
303
+ ++total;
304
+ }
305
+ }
306
+ if (is_ascii) {
307
+ int transform = 44; /* {empty, uppercase all, empty} */
308
+ uint32_t ix = idx + (uint32_t)transform * n;
309
+ const BrotliTrieNode* it;
310
+ TransformedDictionaryWord(idx, l, transform, transforms,
311
+ dict, word, &word_size);
312
+ it = BrotliTrieFind(&dedup, word, word_size);
313
+ if (!it || it->idx_ > ix) {
314
+ uint32_t key = Hash(word, NUM_HASH_BITS);
315
+ if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
316
+ return BROTLI_FALSE;
317
+ }
318
+ BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
319
+ words_by_hash_capacity[key], words_by_hash_size[key],
320
+ MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
321
+ ++total;
322
+ }
323
+ }
324
+ }
325
+ }
326
+ }
327
+
328
+ dict_words = (DictWord*)BrotliAllocate(m,
329
+ sizeof(*dict->dict_words) * (total + 1));
330
+ buckets = (uint16_t*)BrotliAllocate(m,
331
+ sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
332
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
333
+ dict->dict_words_alloc_size_ = total + 1;
334
+ dict->dict_words = dict->dict_words_data_ = dict_words;
335
+ dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
336
+ dict->buckets = dict->buckets_data_ = buckets;
337
+
338
+ /* Unused; makes offsets start from 1. */
339
+ dict_words[0] = MakeDictWord(0, 0, 0);
340
+ total = 1;
341
+ for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
342
+ size_t num_words = words_by_hash_size[i];
343
+ if (num_words > 0) {
344
+ buckets[i] = (uint16_t)(total);
345
+ memcpy(&dict_words[total], &words_by_hash[i][0],
346
+ sizeof(dict_words[0]) * num_words);
347
+ total += num_words;
348
+ dict_words[total - 1].len |= 0x80;
349
+ } else {
350
+ buckets[i] = 0;
351
+ }
352
+ }
353
+
354
+ for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
355
+ BrotliFree(m, words_by_hash[i]);
356
+ }
357
+ BrotliFree(m, words_by_hash);
358
+ BrotliFree(m, words_by_hash_size);
359
+ BrotliFree(m, words_by_hash_capacity);
360
+ BrotliTrieFree(m, &dedup);
361
+
362
+ return BROTLI_TRUE;
363
+ }
364
+
365
+ static void BuildDictionaryHashTable(uint16_t* hash_table_words,
366
+ uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
367
+ int j, len;
368
+ /* The order of the loops is such that in case of collision, words with
369
+ shorter length are preferred, and in case of same length, words with
370
+ smaller index. There is only a single word per bucket. */
371
+ /* TODO(lode): consider adding optional user-supplied frequency_map to use
372
+ for preferred words instead, this can make the encoder better for
373
+ quality 9 and below without affecting the decoder */
374
+ memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
375
+ memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
376
+ for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
377
+ len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
378
+ const size_t num_words = dict->size_bits_by_length[len] ?
379
+ (1u << dict->size_bits_by_length[len]) : 0;
380
+ for (j = (int)num_words - 1; j >= 0; --j) {
381
+ size_t offset = dict->offsets_by_length[len] +
382
+ (size_t)len * (size_t)j;
383
+ const uint8_t* word = &dict->data[offset];
384
+ const uint32_t key = Hash(word, 14);
385
+ int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
386
+ BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
387
+ hash_table_words[idx] = (uint16_t)j;
388
+ hash_table_lengths[idx] = (uint8_t)len;
389
+ }
390
+ }
29
391
  }
30
392
 
393
+ static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
394
+ const BrotliTransforms* transforms,
395
+ BrotliEncoderDictionary* dict) {
396
+ int i, j, l;
397
+ for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
398
+ for (l = 0; l < 32; l++) {
399
+ int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
400
+ for (i = 0; i < num; i++) {
401
+ uint8_t transformed[kTransformedBufferSize];
402
+ size_t size;
403
+ TransformedDictionaryWord(
404
+ (uint32_t)i, l, j, transforms, dict, transformed, &size);
405
+ if (size < 4) continue;
406
+ if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
407
+ transformed, size, &dict->trie)) {
408
+ return BROTLI_FALSE;
409
+ }
410
+ }
411
+ }
412
+ }
413
+ return BROTLI_TRUE;
414
+ }
415
+
416
+ /* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
417
+ the custom transforms, where possible within the limits of the
418
+ cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
419
+ transforms that remove the N last characters (OmitLast). */
420
+ static void ComputeCutoffTransforms(
421
+ const BrotliTransforms* transforms,
422
+ uint32_t* count, uint64_t* data) {
423
+ int i;
424
+ /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
425
+ ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
426
+ transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
427
+ for N=9 it must be 36-99.
428
+ TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t
429
+ for the cutoff transforms, so that shared dictionaries can have the
430
+ OmitLast transforms anywhere without loss. */
431
+ *count = 0;
432
+ *data = 0;
433
+ for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
434
+ int idx = transforms->cutOffTransforms[i];
435
+ if (idx == -1) break; /* Not found */
436
+ if (idx < (i << 2)) break; /* Too small for the encoding */
437
+ if (idx >= (i << 2) + 64) break; /* Too large for the encoding */
438
+ (*count)++;
439
+ *data |= (uint64_t)(((uint64_t)idx -
440
+ ((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
441
+ }
442
+ }
443
+
444
+ static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
445
+ const BrotliTransforms* transforms,
446
+ BrotliEncoderDictionary* current) {
447
+ int default_words = current->words == BrotliGetDictionary();
448
+ int default_transforms = transforms == BrotliGetTransforms();
449
+
450
+ if (default_words && default_transforms) {
451
+ /* hashes are already set to Brotli defaults */
452
+ return BROTLI_TRUE;
453
+ }
454
+
455
+ current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
456
+ m, sizeof(kStaticDictionaryHashWords));
457
+ current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
458
+ m, sizeof(kStaticDictionaryHashLengths));
459
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
460
+ current->hash_table_words = current->hash_table_data_words_;
461
+ current->hash_table_lengths = current->hash_table_data_lengths_;
462
+
463
+ BuildDictionaryHashTable(current->hash_table_data_words_,
464
+ current->hash_table_data_lengths_, current->words);
465
+
466
+ ComputeCutoffTransforms(transforms,
467
+ &current->cutoffTransformsCount, &current->cutoffTransforms);
468
+
469
+ /* Only compute the data for slow encoder if the requested quality is high
470
+ enough to need it */
471
+ if (quality >= ZOPFLIFICATION_QUALITY) {
472
+ if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
473
+
474
+ /* For the built-in Brotli transforms, there is a hard-coded function to
475
+ handle all transforms, but for custom transforms, we use the following
476
+ large hammer instead */
477
+ current->has_words_heavy = !default_transforms;
478
+ if (current->has_words_heavy) {
479
+ if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
480
+ }
481
+ }
482
+
483
+ return BROTLI_TRUE;
484
+ }
485
+ #endif /* BROTLI_EXPERIMENTAL */
486
+
487
+ void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
488
+ dict->magic = kSharedDictionaryMagic;
489
+
490
+ dict->compound.num_chunks = 0;
491
+ dict->compound.total_size = 0;
492
+ dict->compound.chunk_offsets[0] = 0;
493
+ dict->compound.num_prepared_instances_ = 0;
494
+
495
+ dict->contextual.context_based = 0;
496
+ dict->contextual.num_dictionaries = 1;
497
+ dict->contextual.instances_ = 0;
498
+ dict->contextual.num_instances_ = 1; /* The instance_ field */
499
+ dict->contextual.dict[0] = &dict->contextual.instance_;
500
+ InitEncoderDictionary(&dict->contextual.instance_);
501
+ dict->contextual.instance_.parent = &dict->contextual;
502
+
503
+ dict->max_quality = BROTLI_MAX_QUALITY;
504
+ }
505
+
506
+ #if defined(BROTLI_EXPERIMENTAL)
507
+ /* TODO(eustas): make sure that tooling will warn user if not all the cutoff
508
+ transforms are available (for low-quality encoder). */
509
+ static BROTLI_BOOL InitCustomSharedEncoderDictionary(
510
+ MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
511
+ int quality, SharedEncoderDictionary* dict) {
512
+ ContextualEncoderDictionary* contextual;
513
+ CompoundDictionary* compound;
514
+ BrotliEncoderDictionary* instances;
515
+ int i;
516
+ BrotliInitSharedEncoderDictionary(dict);
517
+
518
+ contextual = &dict->contextual;
519
+ compound = &dict->compound;
520
+
521
+ for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
522
+ PreparedDictionary* prepared = CreatePreparedDictionary(m,
523
+ decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
524
+ AttachPreparedDictionary(compound, prepared);
525
+ /* remember for cleanup */
526
+ compound->prepared_instances_[
527
+ compound->num_prepared_instances_++] = prepared;
528
+ }
529
+
530
+ dict->max_quality = quality;
531
+ contextual->context_based = decoded_dict->context_based;
532
+ if (decoded_dict->context_based) {
533
+ memcpy(contextual->context_map, decoded_dict->context_map,
534
+ SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
535
+ }
536
+
537
+ contextual->num_dictionaries = decoded_dict->num_dictionaries;
538
+ contextual->num_instances_ = decoded_dict->num_dictionaries;
539
+ if (contextual->num_instances_ == 1) {
540
+ instances = &contextual->instance_;
541
+ } else {
542
+ contextual->instances_ = (BrotliEncoderDictionary*)
543
+ BrotliAllocate(m, sizeof(*contextual->instances_) *
544
+ contextual->num_instances_);
545
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
546
+ instances = contextual->instances_;
547
+ }
548
+ for (i = 0; i < (int)contextual->num_instances_; i++) {
549
+ BrotliEncoderDictionary* current = &instances[i];
550
+ InitEncoderDictionary(current);
551
+ current->parent = &dict->contextual;
552
+ if (decoded_dict->words[i] == BrotliGetDictionary()) {
553
+ current->words = BrotliGetDictionary();
554
+ } else {
555
+ current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
556
+ m, sizeof(BrotliDictionary));
557
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
558
+ *current->words_instance_ = *decoded_dict->words[i];
559
+ current->words = current->words_instance_;
560
+ }
561
+ current->num_transforms =
562
+ (uint32_t)decoded_dict->transforms[i]->num_transforms;
563
+ if (!ComputeDictionary(
564
+ m, quality, decoded_dict->transforms[i], current)) {
565
+ return BROTLI_FALSE;
566
+ }
567
+
568
+ contextual->dict[i] = current;
569
+ }
570
+
571
+ return BROTLI_TRUE; /* success */
572
+ }
573
+
574
+ BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
575
+ MemoryManager* m, const uint8_t* encoded_dict, size_t size,
576
+ int quality, SharedEncoderDictionary* dict) {
577
+ BROTLI_BOOL success = BROTLI_FALSE;
578
+ BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
579
+ m->alloc_func, m->free_func, m->opaque);
580
+ if (!decoded_dict) { /* OOM */
581
+ return BROTLI_FALSE;
582
+ }
583
+ success = BrotliSharedDictionaryAttach(
584
+ decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
585
+ if (success) {
586
+ success = InitCustomSharedEncoderDictionary(m,
587
+ decoded_dict, quality, dict);
588
+ }
589
+ BrotliSharedDictionaryDestroyInstance(decoded_dict);
590
+ return success;
591
+ }
592
+ #endif /* BROTLI_EXPERIMENTAL */
593
+
594
+ void BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
595
+ SharedEncoderDictionary* dict) {
596
+ size_t i;
597
+ for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
598
+ DestroyPreparedDictionary(m,
599
+ (PreparedDictionary*)dict->compound.prepared_instances_[i]);
600
+ }
601
+ if (dict->contextual.num_instances_ == 1) {
602
+ BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
603
+ } else if (dict->contextual.num_instances_ > 1) {
604
+ for (i = 0; i < dict->contextual.num_instances_; i++) {
605
+ BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
606
+ }
607
+ BrotliFree(m, dict->contextual.instances_);
608
+ }
609
+ }
610
+
611
+ ManagedDictionary* BrotliCreateManagedDictionary(
612
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
613
+ ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
614
+ sizeof(ManagedDictionary), alloc_func, free_func, opaque);
615
+ if (result == NULL) return NULL;
616
+
617
+ result->magic = kManagedDictionaryMagic;
618
+ BrotliInitMemoryManager(
619
+ &result->memory_manager_, alloc_func, free_func, opaque);
620
+ result->dictionary = NULL;
621
+
622
+ return result;
623
+ }
624
+
625
+ void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
626
+ if (!dictionary) return;
627
+ BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
628
+ }
629
+
630
+ /* Escalate internal functions visibility; for testing purposes only. */
631
+ #if defined(BROTLI_TEST)
632
+ void InitEncoderDictionaryForTest(BrotliEncoderDictionary*);
633
+ void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) {
634
+ InitEncoderDictionary(d);
635
+ }
636
+ #endif
637
+
31
638
  #if defined(__cplusplus) || defined(c_plusplus)
32
639
  } /* extern "C" */
33
640
  #endif