brotli 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.travis.yml +7 -3
  4. data/brotli.gemspec +1 -1
  5. data/ext/brotli/brotli.c +4 -4
  6. data/ext/brotli/brotli.h +2 -2
  7. data/ext/brotli/extconf.rb +9 -16
  8. data/lib/brotli/version.rb +1 -1
  9. data/vendor/brotli/{common → c/common}/constants.h +11 -1
  10. data/vendor/brotli/c/common/dictionary.bin +432 -0
  11. data/vendor/brotli/c/common/dictionary.c +5905 -0
  12. data/vendor/brotli/c/common/dictionary.h +64 -0
  13. data/vendor/brotli/c/common/version.h +19 -0
  14. data/vendor/brotli/{dec → c/dec}/bit_reader.c +2 -2
  15. data/vendor/brotli/{dec → c/dec}/bit_reader.h +11 -34
  16. data/vendor/brotli/{dec → c/dec}/context.h +1 -1
  17. data/vendor/brotli/{dec → c/dec}/decode.c +389 -356
  18. data/vendor/brotli/{dec → c/dec}/huffman.c +24 -23
  19. data/vendor/brotli/{dec → c/dec}/huffman.h +1 -1
  20. data/vendor/brotli/{dec → c/dec}/port.h +19 -10
  21. data/vendor/brotli/{dec → c/dec}/prefix.h +1 -1
  22. data/vendor/brotli/{dec → c/dec}/state.c +23 -19
  23. data/vendor/brotli/{dec → c/dec}/state.h +18 -17
  24. data/vendor/brotli/{dec → c/dec}/transform.h +2 -2
  25. data/vendor/brotli/c/enc/backward_references.c +134 -0
  26. data/vendor/brotli/c/enc/backward_references.h +39 -0
  27. data/vendor/brotli/{enc/backward_references.c → c/enc/backward_references_hq.c} +144 -232
  28. data/vendor/brotli/{enc/backward_references.h → c/enc/backward_references_hq.h} +28 -31
  29. data/vendor/brotli/{enc → c/enc}/backward_references_inc.h +37 -31
  30. data/vendor/brotli/{enc → c/enc}/bit_cost.c +1 -1
  31. data/vendor/brotli/{enc → c/enc}/bit_cost.h +1 -1
  32. data/vendor/brotli/{enc → c/enc}/bit_cost_inc.h +0 -0
  33. data/vendor/brotli/{enc → c/enc}/block_encoder_inc.h +0 -0
  34. data/vendor/brotli/{enc → c/enc}/block_splitter.c +2 -4
  35. data/vendor/brotli/{enc → c/enc}/block_splitter.h +1 -1
  36. data/vendor/brotli/{enc → c/enc}/block_splitter_inc.h +6 -7
  37. data/vendor/brotli/{enc → c/enc}/brotli_bit_stream.c +22 -26
  38. data/vendor/brotli/{enc → c/enc}/brotli_bit_stream.h +1 -5
  39. data/vendor/brotli/{enc → c/enc}/cluster.c +1 -1
  40. data/vendor/brotli/{enc → c/enc}/cluster.h +1 -1
  41. data/vendor/brotli/{enc → c/enc}/cluster_inc.h +2 -0
  42. data/vendor/brotli/{enc → c/enc}/command.h +34 -17
  43. data/vendor/brotli/{enc → c/enc}/compress_fragment.c +97 -53
  44. data/vendor/brotli/{enc → c/enc}/compress_fragment.h +5 -2
  45. data/vendor/brotli/{enc → c/enc}/compress_fragment_two_pass.c +106 -51
  46. data/vendor/brotli/{enc → c/enc}/compress_fragment_two_pass.h +5 -2
  47. data/vendor/brotli/{enc → c/enc}/context.h +3 -3
  48. data/vendor/brotli/c/enc/dictionary_hash.c +1120 -0
  49. data/vendor/brotli/c/enc/dictionary_hash.h +24 -0
  50. data/vendor/brotli/{enc → c/enc}/encode.c +442 -240
  51. data/vendor/brotli/{enc → c/enc}/entropy_encode.c +9 -9
  52. data/vendor/brotli/{enc → c/enc}/entropy_encode.h +4 -4
  53. data/vendor/brotli/{enc → c/enc}/entropy_encode_static.h +4 -4
  54. data/vendor/brotli/{enc → c/enc}/fast_log.h +3 -3
  55. data/vendor/brotli/{enc → c/enc}/find_match_length.h +8 -8
  56. data/vendor/brotli/c/enc/hash.h +446 -0
  57. data/vendor/brotli/{enc → c/enc}/hash_forgetful_chain_inc.h +72 -68
  58. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +266 -0
  59. data/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
  60. data/vendor/brotli/{enc → c/enc}/hash_longest_match_quickly_inc.h +81 -77
  61. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +326 -0
  62. data/vendor/brotli/{enc → c/enc}/histogram.c +4 -2
  63. data/vendor/brotli/{enc → c/enc}/histogram.h +1 -1
  64. data/vendor/brotli/{enc → c/enc}/histogram_inc.h +0 -0
  65. data/vendor/brotli/{enc → c/enc}/literal_cost.c +4 -7
  66. data/vendor/brotli/{enc → c/enc}/literal_cost.h +2 -2
  67. data/vendor/brotli/{enc → c/enc}/memory.c +1 -1
  68. data/vendor/brotli/{enc → c/enc}/memory.h +3 -2
  69. data/vendor/brotli/{enc → c/enc}/metablock.c +136 -123
  70. data/vendor/brotli/{enc → c/enc}/metablock.h +2 -12
  71. data/vendor/brotli/{enc → c/enc}/metablock_inc.h +0 -0
  72. data/vendor/brotli/{enc → c/enc}/port.h +49 -33
  73. data/vendor/brotli/{enc → c/enc}/prefix.h +4 -2
  74. data/vendor/brotli/{enc → c/enc}/quality.h +47 -17
  75. data/vendor/brotli/{enc → c/enc}/ringbuffer.h +6 -6
  76. data/vendor/brotli/{enc → c/enc}/static_dict.c +26 -22
  77. data/vendor/brotli/{enc → c/enc}/static_dict.h +3 -1
  78. data/vendor/brotli/c/enc/static_dict_lut.h +5864 -0
  79. data/vendor/brotli/{enc → c/enc}/utf8_util.c +1 -1
  80. data/vendor/brotli/{enc → c/enc}/utf8_util.h +2 -2
  81. data/vendor/brotli/{enc → c/enc}/write_bits.h +3 -3
  82. data/vendor/brotli/c/include/brotli/decode.h +339 -0
  83. data/vendor/brotli/c/include/brotli/encode.h +402 -0
  84. data/vendor/brotli/c/include/brotli/port.h +146 -0
  85. data/vendor/brotli/c/include/brotli/types.h +90 -0
  86. metadata +80 -79
  87. data/vendor/brotli/common/dictionary.c +0 -9474
  88. data/vendor/brotli/common/dictionary.h +0 -29
  89. data/vendor/brotli/common/port.h +0 -107
  90. data/vendor/brotli/common/types.h +0 -58
  91. data/vendor/brotli/dec/decode.h +0 -188
  92. data/vendor/brotli/enc/compressor.cc +0 -139
  93. data/vendor/brotli/enc/compressor.h +0 -161
  94. data/vendor/brotli/enc/dictionary_hash.h +0 -4121
  95. data/vendor/brotli/enc/encode.h +0 -221
  96. data/vendor/brotli/enc/encode_parallel.cc +0 -289
  97. data/vendor/brotli/enc/encode_parallel.h +0 -27
  98. data/vendor/brotli/enc/hash.h +0 -717
  99. data/vendor/brotli/enc/hash_longest_match_inc.h +0 -241
  100. data/vendor/brotli/enc/static_dict_lut.h +0 -11241
  101. data/vendor/brotli/enc/streams.cc +0 -114
  102. data/vendor/brotli/enc/streams.h +0 -121
@@ -1,27 +0,0 @@
1
- /* Copyright 2013 Google Inc. All Rights Reserved.
2
-
3
- Distributed under MIT license.
4
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
- */
6
-
7
- /* API for parallel Brotli compression
8
- Note that this is only a proof of concept currently and not part of the
9
- final API yet. */
10
-
11
- #ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
12
- #define BROTLI_ENC_ENCODE_PARALLEL_H_
13
-
14
- #include "../common/types.h"
15
- #include "./compressor.h"
16
-
17
- namespace brotli {
18
-
19
- int BrotliCompressBufferParallel(BrotliParams params,
20
- size_t input_size,
21
- const uint8_t* input_buffer,
22
- size_t* encoded_size,
23
- uint8_t* encoded_buffer);
24
-
25
- } /* namespace brotli */
26
-
27
- #endif /* BROTLI_ENC_ENCODE_PARALLEL_H_ */
@@ -1,717 +0,0 @@
1
- /* Copyright 2010 Google Inc. All Rights Reserved.
2
-
3
- Distributed under MIT license.
4
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
- */
6
-
7
- /* A (forgetful) hash table to the data seen by the compressor, to
8
- help create backward references to previous data. */
9
-
10
- #ifndef BROTLI_ENC_HASH_H_
11
- #define BROTLI_ENC_HASH_H_
12
-
13
- #include <string.h> /* memcmp, memset */
14
-
15
- #include "../common/constants.h"
16
- #include "../common/dictionary.h"
17
- #include "../common/types.h"
18
- #include "./dictionary_hash.h"
19
- #include "./fast_log.h"
20
- #include "./find_match_length.h"
21
- #include "./memory.h"
22
- #include "./port.h"
23
- #include "./quality.h"
24
- #include "./static_dict.h"
25
-
26
- #if defined(__cplusplus) || defined(c_plusplus)
27
- extern "C" {
28
- #endif
29
-
30
- #define MAX_TREE_SEARCH_DEPTH 64
31
- #define MAX_TREE_COMP_LENGTH 128
32
- #define score_t size_t
33
-
34
- static const uint32_t kDistanceCacheIndex[] = {
35
- 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
36
- };
37
- static const int kDistanceCacheOffset[] = {
38
- 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
39
- };
40
-
41
- static const uint32_t kCutoffTransformsCount = 10;
42
- static const uint8_t kCutoffTransforms[] = {
43
- 0, 12, 27, 23, 42, 63, 56, 48, 59, 64
44
- };
45
-
46
- typedef struct HasherSearchResult {
47
- size_t len;
48
- size_t len_x_code; /* == len ^ len_code */
49
- size_t distance;
50
- score_t score;
51
- } HasherSearchResult;
52
-
53
- typedef struct DictionarySearchStatictics {
54
- size_t num_lookups;
55
- size_t num_matches;
56
- } DictionarySearchStatictics;
57
-
58
- /* kHashMul32 multiplier has these properties:
59
- * The multiplier must be odd. Otherwise we may lose the highest bit.
60
- * No long streaks of 1s or 0s.
61
- * There is no effort to ensure that it is a prime, the oddity is enough
62
- for this use.
63
- * The number has been tuned heuristically against compression benchmarks. */
64
- static const uint32_t kHashMul32 = 0x1e35a7bd;
65
-
66
- static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
67
- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
68
- /* The higher bits contain more mixture from the multiplication,
69
- so we take our results from there. */
70
- return h >> (32 - 14);
71
- }
72
-
73
- #define BROTLI_LITERAL_BYTE_SCORE 540
74
- #define BROTLI_DISTANCE_BIT_PENALTY 120
75
- /* Score must be positive after applying maximal penalty. */
76
- #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
77
-
78
- /* Usually, we always choose the longest backward reference. This function
79
- allows for the exception of that rule.
80
-
81
- If we choose a backward reference that is further away, it will
82
- usually be coded with more bits. We approximate this by assuming
83
- log2(distance). If the distance can be expressed in terms of the
84
- last four distances, we use some heuristic constants to estimate
85
- the bits cost. For the first up to four literals we use the bit
86
- cost of the literals from the literal cost model, after that we
87
- use the average bit cost of the cost model.
88
-
89
- This function is used to sometimes discard a longer backward reference
90
- when it is not much longer and the bit cost for encoding it is more
91
- than the saved literals.
92
-
93
- backward_reference_offset MUST be positive. */
94
- static BROTLI_INLINE score_t BackwardReferenceScore(
95
- size_t copy_length, size_t backward_reference_offset) {
96
- return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
97
- BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
98
- }
99
-
100
- static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {
101
- /* Repeat last */
102
- BROTLI_SCORE_BASE + 60,
103
- /* 2nd, 3rd, 4th last */
104
- BROTLI_SCORE_BASE - 95,
105
- BROTLI_SCORE_BASE - 117,
106
- BROTLI_SCORE_BASE - 127,
107
- /* Last with offset */
108
- BROTLI_SCORE_BASE - 93,
109
- BROTLI_SCORE_BASE - 93,
110
- BROTLI_SCORE_BASE - 96,
111
- BROTLI_SCORE_BASE - 96,
112
- BROTLI_SCORE_BASE - 99,
113
- BROTLI_SCORE_BASE - 99,
114
- /* 2nd last with offset */
115
- BROTLI_SCORE_BASE - 105,
116
- BROTLI_SCORE_BASE - 105,
117
- BROTLI_SCORE_BASE - 115,
118
- BROTLI_SCORE_BASE - 115,
119
- BROTLI_SCORE_BASE - 125,
120
- BROTLI_SCORE_BASE - 125
121
- };
122
-
123
- static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
124
- size_t copy_length, size_t distance_short_code) {
125
- return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
126
- kDistanceShortCodeCost[distance_short_code];
127
- }
128
-
129
- static BROTLI_INLINE void DictionarySearchStaticticsReset(
130
- DictionarySearchStatictics* self) {
131
- self->num_lookups = 0;
132
- self->num_matches = 0;
133
- }
134
-
135
- static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
136
- size_t item, const uint8_t* data, size_t max_length, size_t max_backward,
137
- HasherSearchResult* out) {
138
- size_t len;
139
- size_t dist;
140
- size_t offset;
141
- size_t matchlen;
142
- size_t backward;
143
- score_t score;
144
- len = item & 31;
145
- dist = item >> 5;
146
- offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
147
- if (len > max_length) {
148
- return BROTLI_FALSE;
149
- }
150
-
151
- matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);
152
- if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
153
- return BROTLI_FALSE;
154
- }
155
- {
156
- size_t transform_id = kCutoffTransforms[len - matchlen];
157
- backward = max_backward + dist + 1 +
158
- (transform_id << kBrotliDictionarySizeBitsByLength[len]);
159
- }
160
- score = BackwardReferenceScore(matchlen, backward);
161
- if (score < out->score) {
162
- return BROTLI_FALSE;
163
- }
164
- out->len = matchlen;
165
- out->len_x_code = len ^ matchlen;
166
- out->distance = backward;
167
- out->score = score;
168
- return BROTLI_TRUE;
169
- }
170
-
171
- static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(
172
- DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,
173
- size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
174
- size_t key;
175
- size_t i;
176
- BROTLI_BOOL is_match_found = BROTLI_FALSE;
177
- if (self->num_matches < (self->num_lookups >> 7)) {
178
- return BROTLI_FALSE;
179
- }
180
- key = Hash14(data) << 1;
181
- for (i = 0; i < (shallow ? 1 : 2); ++i, ++key) {
182
- size_t item = kStaticDictionaryHash[key];
183
- self->num_lookups++;
184
- if (item != 0 &&
185
- TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {
186
- self->num_matches++;
187
- is_match_found = BROTLI_TRUE;
188
- }
189
- }
190
- return is_match_found;
191
- }
192
-
193
- typedef struct BackwardMatch {
194
- uint32_t distance;
195
- uint32_t length_and_code;
196
- } BackwardMatch;
197
-
198
- static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
199
- size_t dist, size_t len) {
200
- self->distance = (uint32_t)dist;
201
- self->length_and_code = (uint32_t)(len << 5);
202
- }
203
-
204
- static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
205
- size_t dist, size_t len, size_t len_code) {
206
- self->distance = (uint32_t)dist;
207
- self->length_and_code =
208
- (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
209
- }
210
-
211
- static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
212
- return self->length_and_code >> 5;
213
- }
214
-
215
- static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
216
- size_t code = self->length_and_code & 31;
217
- return code ? code : BackwardMatchLength(self);
218
- }
219
-
220
- #define EXPAND_CAT(a, b) CAT(a, b)
221
- #define CAT(a, b) a ## b
222
- #define FN(X) EXPAND_CAT(X, HASHER())
223
-
224
- #define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)
225
-
226
- #define HASHER() H10
227
- #define HashToBinaryTree HASHER()
228
-
229
- #define BUCKET_BITS 17
230
- #define BUCKET_SIZE (1 << BUCKET_BITS)
231
-
232
- static size_t FN(HashTypeLength)(void) { return 4; }
233
- static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }
234
-
235
- static uint32_t FN(HashBytes)(const uint8_t *data) {
236
- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
237
- /* The higher bits contain more mixture from the multiplication,
238
- so we take our results from there. */
239
- return h >> (32 - BUCKET_BITS);
240
- }
241
-
242
- /* A (forgetful) hash table where each hash bucket contains a binary tree of
243
- sequences whose first 4 bytes share the same hash code.
244
- Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
245
- position in the input data. The binary tree is sorted by the lexicographic
246
- order of the sequences, and it is also a max-heap with respect to the
247
- starting positions. */
248
- typedef struct HashToBinaryTree {
249
- /* The window size minus 1 */
250
- size_t window_mask_;
251
-
252
- /* Hash table that maps the 4-byte hashes of the sequence to the last
253
- position where this hash was found, which is the root of the binary
254
- tree of sequences that share this hash bucket. */
255
- uint32_t buckets_[BUCKET_SIZE];
256
-
257
- /* The union of the binary trees of each hash bucket. The root of the tree
258
- corresponding to a hash is a sequence starting at buckets_[hash] and
259
- the left and right children of a sequence starting at pos are
260
- forest_[2 * pos] and forest_[2 * pos + 1]. */
261
- uint32_t* forest_;
262
-
263
- /* A position used to mark a non-existent sequence, i.e. a tree is empty if
264
- its root is at invalid_pos_ and a node is a leaf if both its children
265
- are at invalid_pos_. */
266
- uint32_t invalid_pos_;
267
-
268
- size_t forest_size_;
269
- BROTLI_BOOL is_dirty_;
270
- } HashToBinaryTree;
271
-
272
- static void FN(Reset)(HashToBinaryTree* self) {
273
- self->is_dirty_ = BROTLI_TRUE;
274
- }
275
-
276
- static void FN(Initialize)(HashToBinaryTree* self) {
277
- self->forest_ = NULL;
278
- self->forest_size_ = 0;
279
- FN(Reset)(self);
280
- }
281
-
282
- static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {
283
- BROTLI_FREE(m, self->forest_);
284
- }
285
-
286
- static void FN(Init)(
287
- MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,
288
- const BrotliEncoderParams* params, size_t position, size_t bytes,
289
- BROTLI_BOOL is_last) {
290
- if (self->is_dirty_) {
291
- uint32_t invalid_pos;
292
- size_t num_nodes;
293
- uint32_t i;
294
- BROTLI_UNUSED(data);
295
- self->window_mask_ = (1u << params->lgwin) - 1u;
296
- invalid_pos = (uint32_t)(0 - self->window_mask_);
297
- self->invalid_pos_ = invalid_pos;
298
- for (i = 0; i < BUCKET_SIZE; i++) {
299
- self->buckets_[i] = invalid_pos;
300
- }
301
- num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;
302
- if (num_nodes > self->forest_size_) {
303
- BROTLI_FREE(m, self->forest_);
304
- self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);
305
- if (BROTLI_IS_OOM(m)) return;
306
- self->forest_size_ = num_nodes;
307
- }
308
- self->is_dirty_ = BROTLI_FALSE;
309
- }
310
- }
311
-
312
- static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
313
- const size_t pos) {
314
- return 2 * (pos & self->window_mask_);
315
- }
316
-
317
- static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
318
- const size_t pos) {
319
- return 2 * (pos & self->window_mask_) + 1;
320
- }
321
-
322
- /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
323
- hash bucket's binary tree is searched for matches and is re-rooted at the
324
- current position.
325
-
326
- If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
327
- current position is searched for matches, but the state of the hash table
328
- is not changed, since we can not know the final sorting order of the
329
- current (incomplete) sequence.
330
-
331
- This function must be called with increasing cur_ix positions. */
332
- static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
333
- HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,
334
- const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
335
- const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
336
- BackwardMatch* BROTLI_RESTRICT matches) {
337
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
338
- const size_t max_comp_len =
339
- BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
340
- const BROTLI_BOOL should_reroot_tree =
341
- TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
342
- const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
343
- size_t prev_ix = self->buckets_[key];
344
- /* The forest index of the rightmost node of the left subtree of the new
345
- root, updated as we traverse and reroot the tree of the hash bucket. */
346
- size_t node_left = FN(LeftChildIndex)(self, cur_ix);
347
- /* The forest index of the leftmost node of the right subtree of the new
348
- root, updated as we traverse and reroot the tree of the hash bucket. */
349
- size_t node_right = FN(RightChildIndex)(self, cur_ix);
350
- /* The match length of the rightmost node of the left subtree of the new
351
- root, updated as we traverse and reroot the tree of the hash bucket. */
352
- size_t best_len_left = 0;
353
- /* The match length of the leftmost node of the right subtree of the new
354
- root, updated as we traverse and reroot the tree of the hash bucket. */
355
- size_t best_len_right = 0;
356
- size_t depth_remaining;
357
- if (should_reroot_tree) {
358
- self->buckets_[key] = (uint32_t)cur_ix;
359
- }
360
- for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
361
- const size_t backward = cur_ix - prev_ix;
362
- const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
363
- if (backward == 0 || backward > max_backward || depth_remaining == 0) {
364
- if (should_reroot_tree) {
365
- self->forest_[node_left] = self->invalid_pos_;
366
- self->forest_[node_right] = self->invalid_pos_;
367
- }
368
- break;
369
- }
370
- {
371
- const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
372
- size_t len;
373
- assert(cur_len <= MAX_TREE_COMP_LENGTH);
374
- len = cur_len +
375
- FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
376
- &data[prev_ix_masked + cur_len],
377
- max_length - cur_len);
378
- assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
379
- if (matches && len > *best_len) {
380
- *best_len = len;
381
- InitBackwardMatch(matches++, backward, len);
382
- }
383
- if (len >= max_comp_len) {
384
- if (should_reroot_tree) {
385
- self->forest_[node_left] =
386
- self->forest_[FN(LeftChildIndex)(self, prev_ix)];
387
- self->forest_[node_right] =
388
- self->forest_[FN(RightChildIndex)(self, prev_ix)];
389
- }
390
- break;
391
- }
392
- if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
393
- best_len_left = len;
394
- if (should_reroot_tree) {
395
- self->forest_[node_left] = (uint32_t)prev_ix;
396
- }
397
- node_left = FN(RightChildIndex)(self, prev_ix);
398
- prev_ix = self->forest_[node_left];
399
- } else {
400
- best_len_right = len;
401
- if (should_reroot_tree) {
402
- self->forest_[node_right] = (uint32_t)prev_ix;
403
- }
404
- node_right = FN(LeftChildIndex)(self, prev_ix);
405
- prev_ix = self->forest_[node_right];
406
- }
407
- }
408
- }
409
- return matches;
410
- }
411
-
412
- /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
413
- length of max_length and stores the position cur_ix in the hash table.
414
-
415
- Sets *num_matches to the number of matches found, and stores the found
416
- matches in matches[0] to matches[*num_matches - 1]. The matches will be
417
- sorted by strictly increasing length and (non-strictly) increasing
418
- distance. */
419
- static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
420
- const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,
421
- const size_t max_length, const size_t max_backward,
422
- const BrotliEncoderParams* params, BackwardMatch* matches) {
423
- BackwardMatch* const orig_matches = matches;
424
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
425
- size_t best_len = 1;
426
- const size_t short_match_max_backward =
427
- params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
428
- size_t stop = cur_ix - short_match_max_backward;
429
- uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
430
- size_t i;
431
- if (cur_ix < short_match_max_backward) { stop = 0; }
432
- for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
433
- size_t prev_ix = i;
434
- const size_t backward = cur_ix - prev_ix;
435
- if (PREDICT_FALSE(backward > max_backward)) {
436
- break;
437
- }
438
- prev_ix &= ring_buffer_mask;
439
- if (data[cur_ix_masked] != data[prev_ix] ||
440
- data[cur_ix_masked + 1] != data[prev_ix + 1]) {
441
- continue;
442
- }
443
- {
444
- const size_t len =
445
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
446
- max_length);
447
- if (len > best_len) {
448
- best_len = len;
449
- InitBackwardMatch(matches++, backward, len);
450
- }
451
- }
452
- }
453
- if (best_len < max_length) {
454
- matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,
455
- max_length, max_backward, &best_len, matches);
456
- }
457
- for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
458
- dict_matches[i] = kInvalidMatch;
459
- }
460
- {
461
- size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
462
- if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,
463
- max_length, &dict_matches[0])) {
464
- size_t maxlen = BROTLI_MIN(
465
- size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
466
- size_t l;
467
- for (l = minlen; l <= maxlen; ++l) {
468
- uint32_t dict_id = dict_matches[l];
469
- if (dict_id < kInvalidMatch) {
470
- InitDictionaryBackwardMatch(matches++,
471
- max_backward + (dict_id >> 5) + 1, l, dict_id & 31);
472
- }
473
- }
474
- }
475
- }
476
- return (size_t)(matches - orig_matches);
477
- }
478
-
479
- /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
480
- current sequence, without returning any matches.
481
- REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
482
- static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,
483
- const size_t mask, const size_t ix) {
484
- /* Maximum distance is window size - 16, see section 9.1. of the spec. */
485
- const size_t max_backward = self->window_mask_ - 15;
486
- FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
487
- max_backward, NULL, NULL);
488
- }
489
-
490
- static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
491
- const uint8_t *data, const size_t mask, const size_t ix_start,
492
- const size_t ix_end) {
493
- size_t i = ix_start + 63 <= ix_end ? ix_end - 63 : ix_start;
494
- for (; i < ix_end; ++i) {
495
- FN(Store)(self, data, mask, i);
496
- }
497
- }
498
-
499
- static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,
500
- size_t num_bytes, size_t position, const uint8_t* ringbuffer,
501
- size_t ringbuffer_mask) {
502
- if (num_bytes >= FN(HashTypeLength)() - 1 &&
503
- position >= MAX_TREE_COMP_LENGTH) {
504
- /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
505
- These could not be calculated before, since they require knowledge
506
- of both the previous and the current block. */
507
- const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
508
- const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
509
- size_t i;
510
- for (i = i_start; i < i_end; ++i) {
511
- /* Maximum distance is window size - 16, see section 9.1. of the spec.
512
- Furthermore, we have to make sure that we don't look further back
513
- from the start of the next block than the window size, otherwise we
514
- could access already overwritten areas of the ringbuffer. */
515
- const size_t max_backward =
516
- self->window_mask_ - BROTLI_MAX(size_t, 15, position - i);
517
- /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
518
- end of the current block and that we have at least
519
- MAX_TREE_COMP_LENGTH tail in the ringbuffer. */
520
- FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
521
- MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
522
- }
523
- }
524
- }
525
-
526
- #undef BUCKET_SIZE
527
- #undef BUCKET_BITS
528
-
529
- #undef HASHER
530
-
531
- /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
532
- a little faster (0.5% - 1%) and it compresses 0.15% better on small text
533
- and html inputs. */
534
-
535
- #define HASHER() H2
536
- #define BUCKET_BITS 16
537
- #define BUCKET_SWEEP 1
538
- #define USE_DICTIONARY 1
539
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
540
- #undef BUCKET_SWEEP
541
- #undef USE_DICTIONARY
542
- #undef HASHER
543
-
544
- #define HASHER() H3
545
- #define BUCKET_SWEEP 2
546
- #define USE_DICTIONARY 0
547
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
548
- #undef USE_DICTIONARY
549
- #undef BUCKET_SWEEP
550
- #undef BUCKET_BITS
551
- #undef HASHER
552
-
553
- #define HASHER() H4
554
- #define BUCKET_BITS 17
555
- #define BUCKET_SWEEP 4
556
- #define USE_DICTIONARY 1
557
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
558
- #undef USE_DICTIONARY
559
- #undef BUCKET_SWEEP
560
- #undef BUCKET_BITS
561
- #undef HASHER
562
-
563
- #define HASHER() H5
564
- #define BUCKET_BITS 14
565
- #define BLOCK_BITS 4
566
- #define NUM_LAST_DISTANCES_TO_CHECK 4
567
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
568
- #undef BLOCK_BITS
569
- #undef HASHER
570
-
571
- #define HASHER() H6
572
- #define BLOCK_BITS 5
573
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
574
- #undef NUM_LAST_DISTANCES_TO_CHECK
575
- #undef BLOCK_BITS
576
- #undef BUCKET_BITS
577
- #undef HASHER
578
-
579
- #define HASHER() H7
580
- #define BUCKET_BITS 15
581
- #define BLOCK_BITS 6
582
- #define NUM_LAST_DISTANCES_TO_CHECK 10
583
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
584
- #undef BLOCK_BITS
585
- #undef HASHER
586
-
587
- #define HASHER() H8
588
- #define BLOCK_BITS 7
589
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
590
- #undef NUM_LAST_DISTANCES_TO_CHECK
591
- #undef BLOCK_BITS
592
- #undef HASHER
593
-
594
- #define HASHER() H9
595
- #define BLOCK_BITS 8
596
- #define NUM_LAST_DISTANCES_TO_CHECK 16
597
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
598
- #undef NUM_LAST_DISTANCES_TO_CHECK
599
- #undef BLOCK_BITS
600
- #undef BUCKET_BITS
601
- #undef HASHER
602
-
603
- #define BUCKET_BITS 15
604
-
605
- #define NUM_LAST_DISTANCES_TO_CHECK 4
606
- #define NUM_BANKS 1
607
- #define BANK_BITS 16
608
- #define HASHER() H40
609
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
610
- #undef HASHER
611
- #undef NUM_LAST_DISTANCES_TO_CHECK
612
-
613
- #define NUM_LAST_DISTANCES_TO_CHECK 10
614
- #define HASHER() H41
615
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
616
- #undef HASHER
617
- #undef NUM_LAST_DISTANCES_TO_CHECK
618
- #undef NUM_BANKS
619
- #undef BANK_BITS
620
-
621
- #define NUM_LAST_DISTANCES_TO_CHECK 16
622
- #define NUM_BANKS 512
623
- #define BANK_BITS 9
624
- #define HASHER() H42
625
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
626
- #undef HASHER
627
- #undef NUM_LAST_DISTANCES_TO_CHECK
628
- #undef NUM_BANKS
629
- #undef BANK_BITS
630
-
631
- #undef BUCKET_BITS
632
-
633
- #undef FN
634
- #undef CAT
635
- #undef EXPAND_CAT
636
-
637
- #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \
638
- H(40) H(41) H(42)
639
- #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
640
-
641
- typedef struct Hashers {
642
- #define _MEMBER(N) H ## N* h ## N;
643
- FOR_ALL_HASHERS(_MEMBER)
644
- #undef _MEMBER
645
- } Hashers;
646
-
647
- static BROTLI_INLINE void InitHashers(Hashers* self) {
648
- #define _INIT(N) self->h ## N = 0;
649
- FOR_ALL_HASHERS(_INIT)
650
- #undef _INIT
651
- }
652
-
653
- static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {
654
- if (self->h10) CleanupH10(m, self->h10);
655
- #define _CLEANUP(N) BROTLI_FREE(m, self->h ## N)
656
- FOR_ALL_HASHERS(_CLEANUP)
657
- #undef _CLEANUP
658
- }
659
-
660
- static BROTLI_INLINE void HashersReset(Hashers* self, int type) {
661
- switch (type) {
662
- #define _RESET(N) case N: ResetH ## N(self->h ## N); break;
663
- FOR_ALL_HASHERS(_RESET)
664
- #undef _RESET
665
- default: break;
666
- }
667
- }
668
-
669
- static BROTLI_INLINE void HashersSetup(
670
- MemoryManager* m, Hashers* self, int type) {
671
- switch (type) {
672
- #define _SETUP(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;
673
- FOR_ALL_HASHERS(_SETUP)
674
- #undef _SETUP
675
- default: break;
676
- }
677
- if (BROTLI_IS_OOM(m)) return;
678
- if (type == 10) InitializeH10(self->h10);
679
- HashersReset(self, type);
680
- }
681
-
682
- #define _WARMUP_HASH(N) \
683
- static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \
684
- const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \
685
- H ## N* hasher) { \
686
- size_t overlap = (StoreLookaheadH ## N()) - 1; \
687
- size_t i; \
688
- InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \
689
- if (BROTLI_IS_OOM(m)) return; \
690
- for (i = 0; i + overlap < size; i++) { \
691
- StoreH ## N(hasher, dict, ~(size_t)0, i); \
692
- } \
693
- }
694
- FOR_ALL_HASHERS(_WARMUP_HASH)
695
- #undef _WARMUP_HASH
696
-
697
- /* Custom LZ77 window. */
698
- static BROTLI_INLINE void HashersPrependCustomDictionary(
699
- MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,
700
- const size_t size, const uint8_t* dict) {
701
- int hasher_type = ChooseHasher(params);
702
- switch (hasher_type) {
703
- #define _PREPEND(N) \
704
- case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;
705
- FOR_ALL_HASHERS(_PREPEND)
706
- #undef _PREPEND
707
- default: break;
708
- }
709
- if (BROTLI_IS_OOM(m)) return;
710
- }
711
-
712
-
713
- #if defined(__cplusplus) || defined(c_plusplus)
714
- } /* extern "C" */
715
- #endif
716
-
717
- #endif /* BROTLI_ENC_HASH_H_ */