brotli 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +6 -3
- data/.github/workflows/publish.yml +7 -17
- data/.gitmodules +1 -1
- data/README.md +2 -2
- data/ext/brotli/brotli.c +1 -0
- data/ext/brotli/extconf.rb +6 -0
- data/lib/brotli/version.rb +1 -1
- data/test/brotli_test.rb +4 -1
- data/vendor/brotli/c/common/constants.c +1 -1
- data/vendor/brotli/c/common/constants.h +2 -1
- data/vendor/brotli/c/common/context.c +1 -1
- data/vendor/brotli/c/common/dictionary.c +5 -3
- data/vendor/brotli/c/common/platform.c +2 -1
- data/vendor/brotli/c/common/platform.h +60 -113
- data/vendor/brotli/c/common/shared_dictionary.c +521 -0
- data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
- data/vendor/brotli/c/common/transform.c +1 -1
- data/vendor/brotli/c/common/version.h +31 -6
- data/vendor/brotli/c/dec/bit_reader.c +10 -8
- data/vendor/brotli/c/dec/bit_reader.h +172 -100
- data/vendor/brotli/c/dec/decode.c +467 -200
- data/vendor/brotli/c/dec/huffman.c +7 -4
- data/vendor/brotli/c/dec/huffman.h +2 -1
- data/vendor/brotli/c/dec/prefix.h +2 -1
- data/vendor/brotli/c/dec/state.c +33 -9
- data/vendor/brotli/c/dec/state.h +70 -35
- data/vendor/brotli/c/enc/backward_references.c +81 -19
- data/vendor/brotli/c/enc/backward_references.h +5 -4
- data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
- data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
- data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
- data/vendor/brotli/c/enc/bit_cost.c +8 -7
- data/vendor/brotli/c/enc/bit_cost.h +5 -4
- data/vendor/brotli/c/enc/block_splitter.c +37 -14
- data/vendor/brotli/c/enc/block_splitter.h +5 -4
- data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
- data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
- data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
- data/vendor/brotli/c/enc/cluster.c +10 -9
- data/vendor/brotli/c/enc/cluster.h +7 -6
- data/vendor/brotli/c/enc/cluster_inc.h +25 -20
- data/vendor/brotli/c/enc/command.c +1 -1
- data/vendor/brotli/c/enc/command.h +5 -4
- data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
- data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
- data/vendor/brotli/c/enc/compress_fragment.c +93 -83
- data/vendor/brotli/c/enc/compress_fragment.h +32 -7
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
- data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
- data/vendor/brotli/c/enc/encode.c +473 -404
- data/vendor/brotli/c/enc/encoder_dict.c +611 -4
- data/vendor/brotli/c/enc/encoder_dict.h +117 -3
- data/vendor/brotli/c/enc/entropy_encode.c +3 -2
- data/vendor/brotli/c/enc/entropy_encode.h +2 -1
- data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
- data/vendor/brotli/c/enc/fast_log.c +1 -1
- data/vendor/brotli/c/enc/fast_log.h +2 -1
- data/vendor/brotli/c/enc/find_match_length.h +15 -22
- data/vendor/brotli/c/enc/hash.h +285 -45
- data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
- data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
- data/vendor/brotli/c/enc/histogram.c +4 -4
- data/vendor/brotli/c/enc/histogram.h +7 -6
- data/vendor/brotli/c/enc/literal_cost.c +20 -15
- data/vendor/brotli/c/enc/literal_cost.h +4 -2
- data/vendor/brotli/c/enc/memory.c +29 -5
- data/vendor/brotli/c/enc/memory.h +19 -2
- data/vendor/brotli/c/enc/metablock.c +72 -58
- data/vendor/brotli/c/enc/metablock.h +9 -8
- data/vendor/brotli/c/enc/metablock_inc.h +8 -6
- data/vendor/brotli/c/enc/params.h +4 -3
- data/vendor/brotli/c/enc/prefix.h +3 -2
- data/vendor/brotli/c/enc/quality.h +40 -3
- data/vendor/brotli/c/enc/ringbuffer.h +4 -3
- data/vendor/brotli/c/enc/state.h +104 -0
- data/vendor/brotli/c/enc/static_dict.c +60 -4
- data/vendor/brotli/c/enc/static_dict.h +3 -2
- data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
- data/vendor/brotli/c/enc/utf8_util.c +1 -1
- data/vendor/brotli/c/enc/utf8_util.h +2 -1
- data/vendor/brotli/c/enc/write_bits.h +2 -1
- data/vendor/brotli/c/include/brotli/decode.h +67 -2
- data/vendor/brotli/c/include/brotli/encode.h +55 -2
- data/vendor/brotli/c/include/brotli/port.h +28 -11
- data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
- metadata +9 -3
@@ -4,18 +4,45 @@
|
|
4
4
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
5
|
*/
|
6
6
|
|
7
|
-
#include "
|
7
|
+
#include "encoder_dict.h"
|
8
|
+
|
9
|
+
#include <stdlib.h> /* malloc, free */
|
8
10
|
|
9
11
|
#include "../common/dictionary.h"
|
12
|
+
#include "../common/platform.h"
|
13
|
+
#include "../common/shared_dictionary_internal.h"
|
10
14
|
#include "../common/transform.h"
|
11
|
-
#include "
|
12
|
-
#include "
|
15
|
+
#include "compound_dictionary.h"
|
16
|
+
#include "dictionary_hash.h"
|
17
|
+
#include "memory.h"
|
18
|
+
#include "quality.h"
|
19
|
+
#include "hash.h"
|
13
20
|
|
14
21
|
#if defined(__cplusplus) || defined(c_plusplus)
|
15
22
|
extern "C" {
|
16
23
|
#endif
|
17
24
|
|
18
|
-
|
25
|
+
#define NUM_HASH_BITS 15u
|
26
|
+
#define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
|
27
|
+
|
28
|
+
static void BrotliTrieInit(BrotliTrie* trie) {
|
29
|
+
trie->pool_capacity = 0;
|
30
|
+
trie->pool_size = 0;
|
31
|
+
trie->pool = 0;
|
32
|
+
|
33
|
+
/* Set up the root node */
|
34
|
+
trie->root.single = 0;
|
35
|
+
trie->root.len_ = 0;
|
36
|
+
trie->root.idx_ = 0;
|
37
|
+
trie->root.sub = 0;
|
38
|
+
}
|
39
|
+
|
40
|
+
static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
|
41
|
+
BrotliFree(m, trie->pool);
|
42
|
+
}
|
43
|
+
|
44
|
+
/* Initializes to RFC 7932 static dictionary / transforms. */
|
45
|
+
static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
|
19
46
|
dict->words = BrotliGetDictionary();
|
20
47
|
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
|
21
48
|
|
@@ -26,8 +53,588 @@ void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
|
|
26
53
|
|
27
54
|
dict->cutoffTransformsCount = kCutoffTransformsCount;
|
28
55
|
dict->cutoffTransforms = kCutoffTransforms;
|
56
|
+
|
57
|
+
dict->parent = 0;
|
58
|
+
|
59
|
+
dict->hash_table_data_words_ = 0;
|
60
|
+
dict->hash_table_data_lengths_ = 0;
|
61
|
+
dict->buckets_alloc_size_ = 0;
|
62
|
+
dict->buckets_data_ = 0;
|
63
|
+
dict->dict_words_alloc_size_ = 0;
|
64
|
+
dict->dict_words_data_ = 0;
|
65
|
+
dict->words_instance_ = 0;
|
66
|
+
dict->has_words_heavy = BROTLI_FALSE;
|
67
|
+
BrotliTrieInit(&dict->trie);
|
68
|
+
}
|
69
|
+
|
70
|
+
static void BrotliDestroyEncoderDictionary(MemoryManager* m,
|
71
|
+
BrotliEncoderDictionary* dict) {
|
72
|
+
BrotliFree(m, dict->hash_table_data_words_);
|
73
|
+
BrotliFree(m, dict->hash_table_data_lengths_);
|
74
|
+
BrotliFree(m, dict->buckets_data_);
|
75
|
+
BrotliFree(m, dict->dict_words_data_);
|
76
|
+
BrotliFree(m, dict->words_instance_);
|
77
|
+
BrotliTrieFree(m, &dict->trie);
|
78
|
+
}
|
79
|
+
|
80
|
+
#if defined(BROTLI_EXPERIMENTAL)
|
81
|
+
/* Word length must be at least 4 bytes */
|
82
|
+
static uint32_t Hash(const uint8_t* data, int bits) {
|
83
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
|
84
|
+
/* The higher bits contain more mixture from the multiplication,
|
85
|
+
so we take our results from there. */
|
86
|
+
return h >> (32 - bits);
|
87
|
+
}
|
88
|
+
|
89
|
+
/* Theoretical max possible word size after transform */
|
90
|
+
#define kTransformedBufferSize \
|
91
|
+
(256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
|
92
|
+
|
93
|
+
/* To be safe buffer must have at least kTransformedBufferSize */
|
94
|
+
static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
|
95
|
+
const BrotliTransforms* transforms,
|
96
|
+
const BrotliEncoderDictionary* dict,
|
97
|
+
uint8_t* buffer, size_t* size) {
|
98
|
+
const uint8_t* dict_word = &dict->words->data[
|
99
|
+
dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
|
100
|
+
*size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
|
101
|
+
transforms, transform);
|
102
|
+
}
|
103
|
+
|
104
|
+
static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
|
105
|
+
DictWord result;
|
106
|
+
result.len = len;
|
107
|
+
result.transform = transform;
|
108
|
+
result.idx = idx;
|
109
|
+
return result;
|
110
|
+
}
|
111
|
+
|
112
|
+
static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
|
113
|
+
BrotliTrieNode** keep) {
|
114
|
+
uint32_t result;
|
115
|
+
uint32_t keep_index = 0;
|
116
|
+
if (keep && *keep != &trie->root) {
|
117
|
+
/* Optional node to keep, since address may change after re-allocating */
|
118
|
+
keep_index = (uint32_t)(*keep - trie->pool);
|
119
|
+
}
|
120
|
+
if (trie->pool_size == 0) {
|
121
|
+
/* Have a dummy node in the front. We do not want the result to be 0, it
|
122
|
+
must be at least 1, 0 represents "null pointer" */
|
123
|
+
trie->pool_size = 1;
|
124
|
+
}
|
125
|
+
BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
|
126
|
+
trie->pool_size + num);
|
127
|
+
if (BROTLI_IS_OOM(m)) return 0;
|
128
|
+
/* Init the new nodes to empty */
|
129
|
+
memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
|
130
|
+
result = (uint32_t)trie->pool_size;
|
131
|
+
trie->pool_size += num;
|
132
|
+
if (keep && *keep != &trie->root) {
|
133
|
+
*keep = trie->pool + keep_index;
|
134
|
+
}
|
135
|
+
return result;
|
136
|
+
}
|
137
|
+
|
138
|
+
/**
|
139
|
+
* len and idx: payload for last node
|
140
|
+
* word, size: the string
|
141
|
+
* index: position in the string
|
142
|
+
*/
|
143
|
+
static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
|
144
|
+
uint32_t idx, const uint8_t* word, size_t size, int index,
|
145
|
+
BrotliTrieNode* node, BrotliTrie* trie) {
|
146
|
+
BrotliTrieNode* child = 0;
|
147
|
+
uint8_t c;
|
148
|
+
if ((size_t)index == size) {
|
149
|
+
if (!node->len_ || idx < node->idx_) {
|
150
|
+
node->len_ = len;
|
151
|
+
node->idx_ = idx;
|
152
|
+
}
|
153
|
+
return BROTLI_TRUE;
|
154
|
+
}
|
155
|
+
c = word[index];
|
156
|
+
if (node->single && c != node->c) {
|
157
|
+
BrotliTrieNode old = trie->pool[node->sub];
|
158
|
+
uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
|
159
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
160
|
+
node->single = 0;
|
161
|
+
node->sub = new_nodes;
|
162
|
+
trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
|
163
|
+
trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
|
164
|
+
old;
|
165
|
+
}
|
166
|
+
if (!node->sub) {
|
167
|
+
uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
|
168
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
169
|
+
node->single = 1;
|
170
|
+
node->c = c;
|
171
|
+
node->sub = new_node;
|
172
|
+
}
|
173
|
+
if (node->single) {
|
174
|
+
child = &trie->pool[node->sub];
|
175
|
+
} else {
|
176
|
+
if (!trie->pool[node->sub + (c >> 4)].sub) {
|
177
|
+
uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
|
178
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
179
|
+
trie->pool[node->sub + (c >> 4)].sub = new_nodes;
|
180
|
+
}
|
181
|
+
child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
|
182
|
+
}
|
183
|
+
return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
|
184
|
+
}
|
185
|
+
|
186
|
+
static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
|
187
|
+
const uint8_t* word, size_t size, BrotliTrie* trie) {
|
188
|
+
return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
|
189
|
+
}
|
190
|
+
|
191
|
+
const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
|
192
|
+
const BrotliTrieNode* node, uint8_t c) {
|
193
|
+
BrotliTrieNode* temp_node;
|
194
|
+
if (node->single) {
|
195
|
+
if (node->c == c) return &trie->pool[node->sub];
|
196
|
+
return 0;
|
197
|
+
}
|
198
|
+
if (!node->sub) return 0;
|
199
|
+
temp_node = &trie->pool[node->sub + (c >> 4)];
|
200
|
+
if (!temp_node->sub) return 0;
|
201
|
+
return &trie->pool[temp_node->sub + (c & 15)];
|
202
|
+
}
|
203
|
+
|
204
|
+
static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
|
205
|
+
const uint8_t* word, size_t size) {
|
206
|
+
const BrotliTrieNode* node = &trie->root;
|
207
|
+
size_t i;
|
208
|
+
for (i = 0; i < size; i++) {
|
209
|
+
node = BrotliTrieSub(trie, node, word[i]);
|
210
|
+
if (!node) return 0;
|
211
|
+
}
|
212
|
+
return node;
|
213
|
+
}
|
214
|
+
|
215
|
+
static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
|
216
|
+
const BrotliTransforms* transforms,
|
217
|
+
BrotliEncoderDictionary* dict) {
|
218
|
+
uint32_t i;
|
219
|
+
DictWord* dict_words;
|
220
|
+
uint16_t* buckets;
|
221
|
+
DictWord** words_by_hash;
|
222
|
+
size_t* words_by_hash_size;
|
223
|
+
size_t* words_by_hash_capacity;
|
224
|
+
BrotliTrie dedup;
|
225
|
+
uint8_t word[kTransformedBufferSize];
|
226
|
+
size_t word_size;
|
227
|
+
size_t total = 0;
|
228
|
+
uint8_t l;
|
229
|
+
uint16_t idx;
|
230
|
+
|
231
|
+
BrotliTrieInit(&dedup);
|
232
|
+
|
233
|
+
words_by_hash = (DictWord**)BrotliAllocate(m,
|
234
|
+
sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
|
235
|
+
words_by_hash_size = (size_t*)BrotliAllocate(m,
|
236
|
+
sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
|
237
|
+
words_by_hash_capacity = (size_t*)BrotliAllocate(m,
|
238
|
+
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
|
239
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
240
|
+
memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
|
241
|
+
memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
|
242
|
+
memset(words_by_hash_capacity, 0,
|
243
|
+
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
|
244
|
+
|
245
|
+
if (transforms->num_transforms > 0) {
|
246
|
+
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
|
247
|
+
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
|
248
|
+
uint16_t n = dict->words->size_bits_by_length[l] ?
|
249
|
+
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
|
250
|
+
for (idx = 0; idx < n; ++idx) {
|
251
|
+
uint32_t key;
|
252
|
+
/* First transform (usually identity) */
|
253
|
+
TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
|
254
|
+
&word_size);
|
255
|
+
/* Cannot hash words smaller than 4 bytes */
|
256
|
+
if (word_size < 4) {
|
257
|
+
/* Break instead of continue, all next words of this length will have
|
258
|
+
same length after transform */
|
259
|
+
break;
|
260
|
+
}
|
261
|
+
if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
|
262
|
+
return BROTLI_FALSE;
|
263
|
+
}
|
264
|
+
key = Hash(word, NUM_HASH_BITS);
|
265
|
+
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
|
266
|
+
words_by_hash_capacity[key], words_by_hash_size[key],
|
267
|
+
MakeDictWord(l, 0, idx));
|
268
|
+
++total;
|
269
|
+
}
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
/* These LUT transforms only supported if no custom transforms. This is
|
274
|
+
ok, we will use the heavy trie instead. */
|
275
|
+
if (transforms == BrotliGetTransforms()) {
|
276
|
+
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
|
277
|
+
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
|
278
|
+
uint16_t n = dict->words->size_bits_by_length[l] ?
|
279
|
+
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
|
280
|
+
for (idx = 0; idx < n; ++idx) {
|
281
|
+
int k;
|
282
|
+
BROTLI_BOOL is_ascii = BROTLI_TRUE;
|
283
|
+
size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
|
284
|
+
const uint8_t* data = &dict->words->data[offset];
|
285
|
+
for (k = 0; k < l; ++k) {
|
286
|
+
if (data[k] >= 128) is_ascii = BROTLI_FALSE;
|
287
|
+
}
|
288
|
+
if (data[0] < 128) {
|
289
|
+
int transform = 9; /* {empty, uppercase first, empty} */
|
290
|
+
uint32_t ix = idx + (uint32_t)transform * n;
|
291
|
+
const BrotliTrieNode* it;
|
292
|
+
TransformedDictionaryWord(idx, l, transform, transforms,
|
293
|
+
dict, word, &word_size);
|
294
|
+
it = BrotliTrieFind(&dedup, word, word_size);
|
295
|
+
if (!it || it->idx_ > ix) {
|
296
|
+
uint32_t key = Hash(word, NUM_HASH_BITS);
|
297
|
+
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
|
298
|
+
return BROTLI_FALSE;
|
299
|
+
}
|
300
|
+
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
|
301
|
+
words_by_hash_capacity[key], words_by_hash_size[key],
|
302
|
+
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
|
303
|
+
++total;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
if (is_ascii) {
|
307
|
+
int transform = 44; /* {empty, uppercase all, empty} */
|
308
|
+
uint32_t ix = idx + (uint32_t)transform * n;
|
309
|
+
const BrotliTrieNode* it;
|
310
|
+
TransformedDictionaryWord(idx, l, transform, transforms,
|
311
|
+
dict, word, &word_size);
|
312
|
+
it = BrotliTrieFind(&dedup, word, word_size);
|
313
|
+
if (!it || it->idx_ > ix) {
|
314
|
+
uint32_t key = Hash(word, NUM_HASH_BITS);
|
315
|
+
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
|
316
|
+
return BROTLI_FALSE;
|
317
|
+
}
|
318
|
+
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
|
319
|
+
words_by_hash_capacity[key], words_by_hash_size[key],
|
320
|
+
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
|
321
|
+
++total;
|
322
|
+
}
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
dict_words = (DictWord*)BrotliAllocate(m,
|
329
|
+
sizeof(*dict->dict_words) * (total + 1));
|
330
|
+
buckets = (uint16_t*)BrotliAllocate(m,
|
331
|
+
sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
|
332
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
333
|
+
dict->dict_words_alloc_size_ = total + 1;
|
334
|
+
dict->dict_words = dict->dict_words_data_ = dict_words;
|
335
|
+
dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
|
336
|
+
dict->buckets = dict->buckets_data_ = buckets;
|
337
|
+
|
338
|
+
/* Unused; makes offsets start from 1. */
|
339
|
+
dict_words[0] = MakeDictWord(0, 0, 0);
|
340
|
+
total = 1;
|
341
|
+
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
|
342
|
+
size_t num_words = words_by_hash_size[i];
|
343
|
+
if (num_words > 0) {
|
344
|
+
buckets[i] = (uint16_t)(total);
|
345
|
+
memcpy(&dict_words[total], &words_by_hash[i][0],
|
346
|
+
sizeof(dict_words[0]) * num_words);
|
347
|
+
total += num_words;
|
348
|
+
dict_words[total - 1].len |= 0x80;
|
349
|
+
} else {
|
350
|
+
buckets[i] = 0;
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
|
355
|
+
BrotliFree(m, words_by_hash[i]);
|
356
|
+
}
|
357
|
+
BrotliFree(m, words_by_hash);
|
358
|
+
BrotliFree(m, words_by_hash_size);
|
359
|
+
BrotliFree(m, words_by_hash_capacity);
|
360
|
+
BrotliTrieFree(m, &dedup);
|
361
|
+
|
362
|
+
return BROTLI_TRUE;
|
363
|
+
}
|
364
|
+
|
365
|
+
static void BuildDictionaryHashTable(uint16_t* hash_table_words,
|
366
|
+
uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
|
367
|
+
int j, len;
|
368
|
+
/* The order of the loops is such that in case of collision, words with
|
369
|
+
shorter length are preferred, and in case of same length, words with
|
370
|
+
smaller index. There is only a single word per bucket. */
|
371
|
+
/* TODO(lode): consider adding optional user-supplied frequency_map to use
|
372
|
+
for preferred words instead, this can make the encoder better for
|
373
|
+
quality 9 and below without affecting the decoder */
|
374
|
+
memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
|
375
|
+
memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
|
376
|
+
for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
|
377
|
+
len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
|
378
|
+
const size_t num_words = dict->size_bits_by_length[len] ?
|
379
|
+
(1u << dict->size_bits_by_length[len]) : 0;
|
380
|
+
for (j = (int)num_words - 1; j >= 0; --j) {
|
381
|
+
size_t offset = dict->offsets_by_length[len] +
|
382
|
+
(size_t)len * (size_t)j;
|
383
|
+
const uint8_t* word = &dict->data[offset];
|
384
|
+
const uint32_t key = Hash(word, 14);
|
385
|
+
int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
|
386
|
+
BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
|
387
|
+
hash_table_words[idx] = (uint16_t)j;
|
388
|
+
hash_table_lengths[idx] = (uint8_t)len;
|
389
|
+
}
|
390
|
+
}
|
29
391
|
}
|
30
392
|
|
393
|
+
static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
|
394
|
+
const BrotliTransforms* transforms,
|
395
|
+
BrotliEncoderDictionary* dict) {
|
396
|
+
int i, j, l;
|
397
|
+
for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
|
398
|
+
for (l = 0; l < 32; l++) {
|
399
|
+
int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
|
400
|
+
for (i = 0; i < num; i++) {
|
401
|
+
uint8_t transformed[kTransformedBufferSize];
|
402
|
+
size_t size;
|
403
|
+
TransformedDictionaryWord(
|
404
|
+
(uint32_t)i, l, j, transforms, dict, transformed, &size);
|
405
|
+
if (size < 4) continue;
|
406
|
+
if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
|
407
|
+
transformed, size, &dict->trie)) {
|
408
|
+
return BROTLI_FALSE;
|
409
|
+
}
|
410
|
+
}
|
411
|
+
}
|
412
|
+
}
|
413
|
+
return BROTLI_TRUE;
|
414
|
+
}
|
415
|
+
|
416
|
+
/* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
|
417
|
+
the custom transforms, where possible within the limits of the
|
418
|
+
cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
|
419
|
+
transforms that remove the N last characters (OmitLast). */
|
420
|
+
static void ComputeCutoffTransforms(
|
421
|
+
const BrotliTransforms* transforms,
|
422
|
+
uint32_t* count, uint64_t* data) {
|
423
|
+
int i;
|
424
|
+
/* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
|
425
|
+
((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
|
426
|
+
transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
|
427
|
+
for N=9 it must be 36-99.
|
428
|
+
TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t
|
429
|
+
for the cutoff transforms, so that shared dictionaries can have the
|
430
|
+
OmitLast transforms anywhere without loss. */
|
431
|
+
*count = 0;
|
432
|
+
*data = 0;
|
433
|
+
for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
|
434
|
+
int idx = transforms->cutOffTransforms[i];
|
435
|
+
if (idx == -1) break; /* Not found */
|
436
|
+
if (idx < (i << 2)) break; /* Too small for the encoding */
|
437
|
+
if (idx >= (i << 2) + 64) break; /* Too large for the encoding */
|
438
|
+
(*count)++;
|
439
|
+
*data |= (uint64_t)(((uint64_t)idx -
|
440
|
+
((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
|
441
|
+
}
|
442
|
+
}
|
443
|
+
|
444
|
+
static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
|
445
|
+
const BrotliTransforms* transforms,
|
446
|
+
BrotliEncoderDictionary* current) {
|
447
|
+
int default_words = current->words == BrotliGetDictionary();
|
448
|
+
int default_transforms = transforms == BrotliGetTransforms();
|
449
|
+
|
450
|
+
if (default_words && default_transforms) {
|
451
|
+
/* hashes are already set to Brotli defaults */
|
452
|
+
return BROTLI_TRUE;
|
453
|
+
}
|
454
|
+
|
455
|
+
current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
|
456
|
+
m, sizeof(kStaticDictionaryHashWords));
|
457
|
+
current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
|
458
|
+
m, sizeof(kStaticDictionaryHashLengths));
|
459
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
460
|
+
current->hash_table_words = current->hash_table_data_words_;
|
461
|
+
current->hash_table_lengths = current->hash_table_data_lengths_;
|
462
|
+
|
463
|
+
BuildDictionaryHashTable(current->hash_table_data_words_,
|
464
|
+
current->hash_table_data_lengths_, current->words);
|
465
|
+
|
466
|
+
ComputeCutoffTransforms(transforms,
|
467
|
+
¤t->cutoffTransformsCount, ¤t->cutoffTransforms);
|
468
|
+
|
469
|
+
/* Only compute the data for slow encoder if the requested quality is high
|
470
|
+
enough to need it */
|
471
|
+
if (quality >= ZOPFLIFICATION_QUALITY) {
|
472
|
+
if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
|
473
|
+
|
474
|
+
/* For the built-in Brotli transforms, there is a hard-coded function to
|
475
|
+
handle all transforms, but for custom transforms, we use the following
|
476
|
+
large hammer instead */
|
477
|
+
current->has_words_heavy = !default_transforms;
|
478
|
+
if (current->has_words_heavy) {
|
479
|
+
if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
|
480
|
+
}
|
481
|
+
}
|
482
|
+
|
483
|
+
return BROTLI_TRUE;
|
484
|
+
}
|
485
|
+
#endif /* BROTLI_EXPERIMENTAL */
|
486
|
+
|
487
|
+
void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
|
488
|
+
dict->magic = kSharedDictionaryMagic;
|
489
|
+
|
490
|
+
dict->compound.num_chunks = 0;
|
491
|
+
dict->compound.total_size = 0;
|
492
|
+
dict->compound.chunk_offsets[0] = 0;
|
493
|
+
dict->compound.num_prepared_instances_ = 0;
|
494
|
+
|
495
|
+
dict->contextual.context_based = 0;
|
496
|
+
dict->contextual.num_dictionaries = 1;
|
497
|
+
dict->contextual.instances_ = 0;
|
498
|
+
dict->contextual.num_instances_ = 1; /* The instance_ field */
|
499
|
+
dict->contextual.dict[0] = &dict->contextual.instance_;
|
500
|
+
InitEncoderDictionary(&dict->contextual.instance_);
|
501
|
+
dict->contextual.instance_.parent = &dict->contextual;
|
502
|
+
|
503
|
+
dict->max_quality = BROTLI_MAX_QUALITY;
|
504
|
+
}
|
505
|
+
|
506
|
+
#if defined(BROTLI_EXPERIMENTAL)
|
507
|
+
/* TODO(eustas): make sure that tooling will warn user if not all the cutoff
|
508
|
+
transforms are available (for low-quality encoder). */
|
509
|
+
static BROTLI_BOOL InitCustomSharedEncoderDictionary(
|
510
|
+
MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
|
511
|
+
int quality, SharedEncoderDictionary* dict) {
|
512
|
+
ContextualEncoderDictionary* contextual;
|
513
|
+
CompoundDictionary* compound;
|
514
|
+
BrotliEncoderDictionary* instances;
|
515
|
+
int i;
|
516
|
+
BrotliInitSharedEncoderDictionary(dict);
|
517
|
+
|
518
|
+
contextual = &dict->contextual;
|
519
|
+
compound = &dict->compound;
|
520
|
+
|
521
|
+
for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
|
522
|
+
PreparedDictionary* prepared = CreatePreparedDictionary(m,
|
523
|
+
decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
|
524
|
+
AttachPreparedDictionary(compound, prepared);
|
525
|
+
/* remember for cleanup */
|
526
|
+
compound->prepared_instances_[
|
527
|
+
compound->num_prepared_instances_++] = prepared;
|
528
|
+
}
|
529
|
+
|
530
|
+
dict->max_quality = quality;
|
531
|
+
contextual->context_based = decoded_dict->context_based;
|
532
|
+
if (decoded_dict->context_based) {
|
533
|
+
memcpy(contextual->context_map, decoded_dict->context_map,
|
534
|
+
SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
|
535
|
+
}
|
536
|
+
|
537
|
+
contextual->num_dictionaries = decoded_dict->num_dictionaries;
|
538
|
+
contextual->num_instances_ = decoded_dict->num_dictionaries;
|
539
|
+
if (contextual->num_instances_ == 1) {
|
540
|
+
instances = &contextual->instance_;
|
541
|
+
} else {
|
542
|
+
contextual->instances_ = (BrotliEncoderDictionary*)
|
543
|
+
BrotliAllocate(m, sizeof(*contextual->instances_) *
|
544
|
+
contextual->num_instances_);
|
545
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
546
|
+
instances = contextual->instances_;
|
547
|
+
}
|
548
|
+
for (i = 0; i < (int)contextual->num_instances_; i++) {
|
549
|
+
BrotliEncoderDictionary* current = &instances[i];
|
550
|
+
InitEncoderDictionary(current);
|
551
|
+
current->parent = &dict->contextual;
|
552
|
+
if (decoded_dict->words[i] == BrotliGetDictionary()) {
|
553
|
+
current->words = BrotliGetDictionary();
|
554
|
+
} else {
|
555
|
+
current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
|
556
|
+
m, sizeof(BrotliDictionary));
|
557
|
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
558
|
+
*current->words_instance_ = *decoded_dict->words[i];
|
559
|
+
current->words = current->words_instance_;
|
560
|
+
}
|
561
|
+
current->num_transforms =
|
562
|
+
(uint32_t)decoded_dict->transforms[i]->num_transforms;
|
563
|
+
if (!ComputeDictionary(
|
564
|
+
m, quality, decoded_dict->transforms[i], current)) {
|
565
|
+
return BROTLI_FALSE;
|
566
|
+
}
|
567
|
+
|
568
|
+
contextual->dict[i] = current;
|
569
|
+
}
|
570
|
+
|
571
|
+
return BROTLI_TRUE; /* success */
|
572
|
+
}
|
573
|
+
|
574
|
+
BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
|
575
|
+
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
|
576
|
+
int quality, SharedEncoderDictionary* dict) {
|
577
|
+
BROTLI_BOOL success = BROTLI_FALSE;
|
578
|
+
BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
|
579
|
+
m->alloc_func, m->free_func, m->opaque);
|
580
|
+
if (!decoded_dict) { /* OOM */
|
581
|
+
return BROTLI_FALSE;
|
582
|
+
}
|
583
|
+
success = BrotliSharedDictionaryAttach(
|
584
|
+
decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
|
585
|
+
if (success) {
|
586
|
+
success = InitCustomSharedEncoderDictionary(m,
|
587
|
+
decoded_dict, quality, dict);
|
588
|
+
}
|
589
|
+
BrotliSharedDictionaryDestroyInstance(decoded_dict);
|
590
|
+
return success;
|
591
|
+
}
|
592
|
+
#endif /* BROTLI_EXPERIMENTAL */
|
593
|
+
|
594
|
+
void BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
|
595
|
+
SharedEncoderDictionary* dict) {
|
596
|
+
size_t i;
|
597
|
+
for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
|
598
|
+
DestroyPreparedDictionary(m,
|
599
|
+
(PreparedDictionary*)dict->compound.prepared_instances_[i]);
|
600
|
+
}
|
601
|
+
if (dict->contextual.num_instances_ == 1) {
|
602
|
+
BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
|
603
|
+
} else if (dict->contextual.num_instances_ > 1) {
|
604
|
+
for (i = 0; i < dict->contextual.num_instances_; i++) {
|
605
|
+
BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
|
606
|
+
}
|
607
|
+
BrotliFree(m, dict->contextual.instances_);
|
608
|
+
}
|
609
|
+
}
|
610
|
+
|
611
|
+
ManagedDictionary* BrotliCreateManagedDictionary(
|
612
|
+
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
|
613
|
+
ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
|
614
|
+
sizeof(ManagedDictionary), alloc_func, free_func, opaque);
|
615
|
+
if (result == NULL) return NULL;
|
616
|
+
|
617
|
+
result->magic = kManagedDictionaryMagic;
|
618
|
+
BrotliInitMemoryManager(
|
619
|
+
&result->memory_manager_, alloc_func, free_func, opaque);
|
620
|
+
result->dictionary = NULL;
|
621
|
+
|
622
|
+
return result;
|
623
|
+
}
|
624
|
+
|
625
|
+
void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
|
626
|
+
if (!dictionary) return;
|
627
|
+
BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
|
628
|
+
}
|
629
|
+
|
630
|
+
/* Escalate internal functions visibility; for testing purposes only. */
|
631
|
+
#if defined(BROTLI_TEST)
|
632
|
+
void InitEncoderDictionaryForTest(BrotliEncoderDictionary*);
|
633
|
+
void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) {
|
634
|
+
InitEncoderDictionary(d);
|
635
|
+
}
|
636
|
+
#endif
|
637
|
+
|
31
638
|
#if defined(__cplusplus) || defined(c_plusplus)
|
32
639
|
} /* extern "C" */
|
33
640
|
#endif
|