brotli 0.1.8 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.travis.yml +7 -3
  4. data/brotli.gemspec +1 -1
  5. data/ext/brotli/brotli.c +4 -4
  6. data/ext/brotli/brotli.h +2 -2
  7. data/ext/brotli/extconf.rb +9 -16
  8. data/lib/brotli/version.rb +1 -1
  9. data/vendor/brotli/{common → c/common}/constants.h +11 -1
  10. data/vendor/brotli/c/common/dictionary.bin +432 -0
  11. data/vendor/brotli/c/common/dictionary.c +5905 -0
  12. data/vendor/brotli/c/common/dictionary.h +64 -0
  13. data/vendor/brotli/c/common/version.h +19 -0
  14. data/vendor/brotli/{dec → c/dec}/bit_reader.c +2 -2
  15. data/vendor/brotli/{dec → c/dec}/bit_reader.h +11 -34
  16. data/vendor/brotli/{dec → c/dec}/context.h +1 -1
  17. data/vendor/brotli/{dec → c/dec}/decode.c +389 -356
  18. data/vendor/brotli/{dec → c/dec}/huffman.c +24 -23
  19. data/vendor/brotli/{dec → c/dec}/huffman.h +1 -1
  20. data/vendor/brotli/{dec → c/dec}/port.h +19 -10
  21. data/vendor/brotli/{dec → c/dec}/prefix.h +1 -1
  22. data/vendor/brotli/{dec → c/dec}/state.c +23 -19
  23. data/vendor/brotli/{dec → c/dec}/state.h +18 -17
  24. data/vendor/brotli/{dec → c/dec}/transform.h +2 -2
  25. data/vendor/brotli/c/enc/backward_references.c +134 -0
  26. data/vendor/brotli/c/enc/backward_references.h +39 -0
  27. data/vendor/brotli/{enc/backward_references.c → c/enc/backward_references_hq.c} +144 -232
  28. data/vendor/brotli/{enc/backward_references.h → c/enc/backward_references_hq.h} +28 -31
  29. data/vendor/brotli/{enc → c/enc}/backward_references_inc.h +37 -31
  30. data/vendor/brotli/{enc → c/enc}/bit_cost.c +1 -1
  31. data/vendor/brotli/{enc → c/enc}/bit_cost.h +1 -1
  32. data/vendor/brotli/{enc → c/enc}/bit_cost_inc.h +0 -0
  33. data/vendor/brotli/{enc → c/enc}/block_encoder_inc.h +0 -0
  34. data/vendor/brotli/{enc → c/enc}/block_splitter.c +2 -4
  35. data/vendor/brotli/{enc → c/enc}/block_splitter.h +1 -1
  36. data/vendor/brotli/{enc → c/enc}/block_splitter_inc.h +6 -7
  37. data/vendor/brotli/{enc → c/enc}/brotli_bit_stream.c +22 -26
  38. data/vendor/brotli/{enc → c/enc}/brotli_bit_stream.h +1 -5
  39. data/vendor/brotli/{enc → c/enc}/cluster.c +1 -1
  40. data/vendor/brotli/{enc → c/enc}/cluster.h +1 -1
  41. data/vendor/brotli/{enc → c/enc}/cluster_inc.h +2 -0
  42. data/vendor/brotli/{enc → c/enc}/command.h +34 -17
  43. data/vendor/brotli/{enc → c/enc}/compress_fragment.c +97 -53
  44. data/vendor/brotli/{enc → c/enc}/compress_fragment.h +5 -2
  45. data/vendor/brotli/{enc → c/enc}/compress_fragment_two_pass.c +106 -51
  46. data/vendor/brotli/{enc → c/enc}/compress_fragment_two_pass.h +5 -2
  47. data/vendor/brotli/{enc → c/enc}/context.h +3 -3
  48. data/vendor/brotli/c/enc/dictionary_hash.c +1120 -0
  49. data/vendor/brotli/c/enc/dictionary_hash.h +24 -0
  50. data/vendor/brotli/{enc → c/enc}/encode.c +442 -240
  51. data/vendor/brotli/{enc → c/enc}/entropy_encode.c +9 -9
  52. data/vendor/brotli/{enc → c/enc}/entropy_encode.h +4 -4
  53. data/vendor/brotli/{enc → c/enc}/entropy_encode_static.h +4 -4
  54. data/vendor/brotli/{enc → c/enc}/fast_log.h +3 -3
  55. data/vendor/brotli/{enc → c/enc}/find_match_length.h +8 -8
  56. data/vendor/brotli/c/enc/hash.h +446 -0
  57. data/vendor/brotli/{enc → c/enc}/hash_forgetful_chain_inc.h +72 -68
  58. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +266 -0
  59. data/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
  60. data/vendor/brotli/{enc → c/enc}/hash_longest_match_quickly_inc.h +81 -77
  61. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +326 -0
  62. data/vendor/brotli/{enc → c/enc}/histogram.c +4 -2
  63. data/vendor/brotli/{enc → c/enc}/histogram.h +1 -1
  64. data/vendor/brotli/{enc → c/enc}/histogram_inc.h +0 -0
  65. data/vendor/brotli/{enc → c/enc}/literal_cost.c +4 -7
  66. data/vendor/brotli/{enc → c/enc}/literal_cost.h +2 -2
  67. data/vendor/brotli/{enc → c/enc}/memory.c +1 -1
  68. data/vendor/brotli/{enc → c/enc}/memory.h +3 -2
  69. data/vendor/brotli/{enc → c/enc}/metablock.c +136 -123
  70. data/vendor/brotli/{enc → c/enc}/metablock.h +2 -12
  71. data/vendor/brotli/{enc → c/enc}/metablock_inc.h +0 -0
  72. data/vendor/brotli/{enc → c/enc}/port.h +49 -33
  73. data/vendor/brotli/{enc → c/enc}/prefix.h +4 -2
  74. data/vendor/brotli/{enc → c/enc}/quality.h +47 -17
  75. data/vendor/brotli/{enc → c/enc}/ringbuffer.h +6 -6
  76. data/vendor/brotli/{enc → c/enc}/static_dict.c +26 -22
  77. data/vendor/brotli/{enc → c/enc}/static_dict.h +3 -1
  78. data/vendor/brotli/c/enc/static_dict_lut.h +5864 -0
  79. data/vendor/brotli/{enc → c/enc}/utf8_util.c +1 -1
  80. data/vendor/brotli/{enc → c/enc}/utf8_util.h +2 -2
  81. data/vendor/brotli/{enc → c/enc}/write_bits.h +3 -3
  82. data/vendor/brotli/c/include/brotli/decode.h +339 -0
  83. data/vendor/brotli/c/include/brotli/encode.h +402 -0
  84. data/vendor/brotli/c/include/brotli/port.h +146 -0
  85. data/vendor/brotli/c/include/brotli/types.h +90 -0
  86. metadata +80 -79
  87. data/vendor/brotli/common/dictionary.c +0 -9474
  88. data/vendor/brotli/common/dictionary.h +0 -29
  89. data/vendor/brotli/common/port.h +0 -107
  90. data/vendor/brotli/common/types.h +0 -58
  91. data/vendor/brotli/dec/decode.h +0 -188
  92. data/vendor/brotli/enc/compressor.cc +0 -139
  93. data/vendor/brotli/enc/compressor.h +0 -161
  94. data/vendor/brotli/enc/dictionary_hash.h +0 -4121
  95. data/vendor/brotli/enc/encode.h +0 -221
  96. data/vendor/brotli/enc/encode_parallel.cc +0 -289
  97. data/vendor/brotli/enc/encode_parallel.h +0 -27
  98. data/vendor/brotli/enc/hash.h +0 -717
  99. data/vendor/brotli/enc/hash_longest_match_inc.h +0 -241
  100. data/vendor/brotli/enc/static_dict_lut.h +0 -11241
  101. data/vendor/brotli/enc/streams.cc +0 -114
  102. data/vendor/brotli/enc/streams.h +0 -121
@@ -1,27 +0,0 @@
1
- /* Copyright 2013 Google Inc. All Rights Reserved.
2
-
3
- Distributed under MIT license.
4
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
- */
6
-
7
- /* API for parallel Brotli compression
8
- Note that this is only a proof of concept currently and not part of the
9
- final API yet. */
10
-
11
- #ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
12
- #define BROTLI_ENC_ENCODE_PARALLEL_H_
13
-
14
- #include "../common/types.h"
15
- #include "./compressor.h"
16
-
17
- namespace brotli {
18
-
19
- int BrotliCompressBufferParallel(BrotliParams params,
20
- size_t input_size,
21
- const uint8_t* input_buffer,
22
- size_t* encoded_size,
23
- uint8_t* encoded_buffer);
24
-
25
- } /* namespace brotli */
26
-
27
- #endif /* BROTLI_ENC_ENCODE_PARALLEL_H_ */
@@ -1,717 +0,0 @@
1
- /* Copyright 2010 Google Inc. All Rights Reserved.
2
-
3
- Distributed under MIT license.
4
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
- */
6
-
7
- /* A (forgetful) hash table to the data seen by the compressor, to
8
- help create backward references to previous data. */
9
-
10
- #ifndef BROTLI_ENC_HASH_H_
11
- #define BROTLI_ENC_HASH_H_
12
-
13
- #include <string.h> /* memcmp, memset */
14
-
15
- #include "../common/constants.h"
16
- #include "../common/dictionary.h"
17
- #include "../common/types.h"
18
- #include "./dictionary_hash.h"
19
- #include "./fast_log.h"
20
- #include "./find_match_length.h"
21
- #include "./memory.h"
22
- #include "./port.h"
23
- #include "./quality.h"
24
- #include "./static_dict.h"
25
-
26
- #if defined(__cplusplus) || defined(c_plusplus)
27
- extern "C" {
28
- #endif
29
-
30
- #define MAX_TREE_SEARCH_DEPTH 64
31
- #define MAX_TREE_COMP_LENGTH 128
32
- #define score_t size_t
33
-
34
- static const uint32_t kDistanceCacheIndex[] = {
35
- 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
36
- };
37
- static const int kDistanceCacheOffset[] = {
38
- 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
39
- };
40
-
41
- static const uint32_t kCutoffTransformsCount = 10;
42
- static const uint8_t kCutoffTransforms[] = {
43
- 0, 12, 27, 23, 42, 63, 56, 48, 59, 64
44
- };
45
-
46
- typedef struct HasherSearchResult {
47
- size_t len;
48
- size_t len_x_code; /* == len ^ len_code */
49
- size_t distance;
50
- score_t score;
51
- } HasherSearchResult;
52
-
53
- typedef struct DictionarySearchStatictics {
54
- size_t num_lookups;
55
- size_t num_matches;
56
- } DictionarySearchStatictics;
57
-
58
- /* kHashMul32 multiplier has these properties:
59
- * The multiplier must be odd. Otherwise we may lose the highest bit.
60
- * No long streaks of 1s or 0s.
61
- * There is no effort to ensure that it is a prime, the oddity is enough
62
- for this use.
63
- * The number has been tuned heuristically against compression benchmarks. */
64
- static const uint32_t kHashMul32 = 0x1e35a7bd;
65
-
66
- static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
67
- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
68
- /* The higher bits contain more mixture from the multiplication,
69
- so we take our results from there. */
70
- return h >> (32 - 14);
71
- }
72
-
73
- #define BROTLI_LITERAL_BYTE_SCORE 540
74
- #define BROTLI_DISTANCE_BIT_PENALTY 120
75
- /* Score must be positive after applying maximal penalty. */
76
- #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
77
-
78
- /* Usually, we always choose the longest backward reference. This function
79
- allows for the exception of that rule.
80
-
81
- If we choose a backward reference that is further away, it will
82
- usually be coded with more bits. We approximate this by assuming
83
- log2(distance). If the distance can be expressed in terms of the
84
- last four distances, we use some heuristic constants to estimate
85
- the bits cost. For the first up to four literals we use the bit
86
- cost of the literals from the literal cost model, after that we
87
- use the average bit cost of the cost model.
88
-
89
- This function is used to sometimes discard a longer backward reference
90
- when it is not much longer and the bit cost for encoding it is more
91
- than the saved literals.
92
-
93
- backward_reference_offset MUST be positive. */
94
- static BROTLI_INLINE score_t BackwardReferenceScore(
95
- size_t copy_length, size_t backward_reference_offset) {
96
- return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
97
- BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
98
- }
99
-
100
- static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {
101
- /* Repeat last */
102
- BROTLI_SCORE_BASE + 60,
103
- /* 2nd, 3rd, 4th last */
104
- BROTLI_SCORE_BASE - 95,
105
- BROTLI_SCORE_BASE - 117,
106
- BROTLI_SCORE_BASE - 127,
107
- /* Last with offset */
108
- BROTLI_SCORE_BASE - 93,
109
- BROTLI_SCORE_BASE - 93,
110
- BROTLI_SCORE_BASE - 96,
111
- BROTLI_SCORE_BASE - 96,
112
- BROTLI_SCORE_BASE - 99,
113
- BROTLI_SCORE_BASE - 99,
114
- /* 2nd last with offset */
115
- BROTLI_SCORE_BASE - 105,
116
- BROTLI_SCORE_BASE - 105,
117
- BROTLI_SCORE_BASE - 115,
118
- BROTLI_SCORE_BASE - 115,
119
- BROTLI_SCORE_BASE - 125,
120
- BROTLI_SCORE_BASE - 125
121
- };
122
-
123
- static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
124
- size_t copy_length, size_t distance_short_code) {
125
- return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
126
- kDistanceShortCodeCost[distance_short_code];
127
- }
128
-
129
- static BROTLI_INLINE void DictionarySearchStaticticsReset(
130
- DictionarySearchStatictics* self) {
131
- self->num_lookups = 0;
132
- self->num_matches = 0;
133
- }
134
-
135
- static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
136
- size_t item, const uint8_t* data, size_t max_length, size_t max_backward,
137
- HasherSearchResult* out) {
138
- size_t len;
139
- size_t dist;
140
- size_t offset;
141
- size_t matchlen;
142
- size_t backward;
143
- score_t score;
144
- len = item & 31;
145
- dist = item >> 5;
146
- offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
147
- if (len > max_length) {
148
- return BROTLI_FALSE;
149
- }
150
-
151
- matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);
152
- if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
153
- return BROTLI_FALSE;
154
- }
155
- {
156
- size_t transform_id = kCutoffTransforms[len - matchlen];
157
- backward = max_backward + dist + 1 +
158
- (transform_id << kBrotliDictionarySizeBitsByLength[len]);
159
- }
160
- score = BackwardReferenceScore(matchlen, backward);
161
- if (score < out->score) {
162
- return BROTLI_FALSE;
163
- }
164
- out->len = matchlen;
165
- out->len_x_code = len ^ matchlen;
166
- out->distance = backward;
167
- out->score = score;
168
- return BROTLI_TRUE;
169
- }
170
-
171
- static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(
172
- DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,
173
- size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
174
- size_t key;
175
- size_t i;
176
- BROTLI_BOOL is_match_found = BROTLI_FALSE;
177
- if (self->num_matches < (self->num_lookups >> 7)) {
178
- return BROTLI_FALSE;
179
- }
180
- key = Hash14(data) << 1;
181
- for (i = 0; i < (shallow ? 1 : 2); ++i, ++key) {
182
- size_t item = kStaticDictionaryHash[key];
183
- self->num_lookups++;
184
- if (item != 0 &&
185
- TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {
186
- self->num_matches++;
187
- is_match_found = BROTLI_TRUE;
188
- }
189
- }
190
- return is_match_found;
191
- }
192
-
193
- typedef struct BackwardMatch {
194
- uint32_t distance;
195
- uint32_t length_and_code;
196
- } BackwardMatch;
197
-
198
- static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
199
- size_t dist, size_t len) {
200
- self->distance = (uint32_t)dist;
201
- self->length_and_code = (uint32_t)(len << 5);
202
- }
203
-
204
- static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
205
- size_t dist, size_t len, size_t len_code) {
206
- self->distance = (uint32_t)dist;
207
- self->length_and_code =
208
- (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
209
- }
210
-
211
- static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
212
- return self->length_and_code >> 5;
213
- }
214
-
215
- static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
216
- size_t code = self->length_and_code & 31;
217
- return code ? code : BackwardMatchLength(self);
218
- }
219
-
220
- #define EXPAND_CAT(a, b) CAT(a, b)
221
- #define CAT(a, b) a ## b
222
- #define FN(X) EXPAND_CAT(X, HASHER())
223
-
224
- #define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)
225
-
226
- #define HASHER() H10
227
- #define HashToBinaryTree HASHER()
228
-
229
- #define BUCKET_BITS 17
230
- #define BUCKET_SIZE (1 << BUCKET_BITS)
231
-
232
- static size_t FN(HashTypeLength)(void) { return 4; }
233
- static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }
234
-
235
- static uint32_t FN(HashBytes)(const uint8_t *data) {
236
- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
237
- /* The higher bits contain more mixture from the multiplication,
238
- so we take our results from there. */
239
- return h >> (32 - BUCKET_BITS);
240
- }
241
-
242
- /* A (forgetful) hash table where each hash bucket contains a binary tree of
243
- sequences whose first 4 bytes share the same hash code.
244
- Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
245
- position in the input data. The binary tree is sorted by the lexicographic
246
- order of the sequences, and it is also a max-heap with respect to the
247
- starting positions. */
248
- typedef struct HashToBinaryTree {
249
- /* The window size minus 1 */
250
- size_t window_mask_;
251
-
252
- /* Hash table that maps the 4-byte hashes of the sequence to the last
253
- position where this hash was found, which is the root of the binary
254
- tree of sequences that share this hash bucket. */
255
- uint32_t buckets_[BUCKET_SIZE];
256
-
257
- /* The union of the binary trees of each hash bucket. The root of the tree
258
- corresponding to a hash is a sequence starting at buckets_[hash] and
259
- the left and right children of a sequence starting at pos are
260
- forest_[2 * pos] and forest_[2 * pos + 1]. */
261
- uint32_t* forest_;
262
-
263
- /* A position used to mark a non-existent sequence, i.e. a tree is empty if
264
- its root is at invalid_pos_ and a node is a leaf if both its children
265
- are at invalid_pos_. */
266
- uint32_t invalid_pos_;
267
-
268
- size_t forest_size_;
269
- BROTLI_BOOL is_dirty_;
270
- } HashToBinaryTree;
271
-
272
- static void FN(Reset)(HashToBinaryTree* self) {
273
- self->is_dirty_ = BROTLI_TRUE;
274
- }
275
-
276
- static void FN(Initialize)(HashToBinaryTree* self) {
277
- self->forest_ = NULL;
278
- self->forest_size_ = 0;
279
- FN(Reset)(self);
280
- }
281
-
282
- static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {
283
- BROTLI_FREE(m, self->forest_);
284
- }
285
-
286
- static void FN(Init)(
287
- MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,
288
- const BrotliEncoderParams* params, size_t position, size_t bytes,
289
- BROTLI_BOOL is_last) {
290
- if (self->is_dirty_) {
291
- uint32_t invalid_pos;
292
- size_t num_nodes;
293
- uint32_t i;
294
- BROTLI_UNUSED(data);
295
- self->window_mask_ = (1u << params->lgwin) - 1u;
296
- invalid_pos = (uint32_t)(0 - self->window_mask_);
297
- self->invalid_pos_ = invalid_pos;
298
- for (i = 0; i < BUCKET_SIZE; i++) {
299
- self->buckets_[i] = invalid_pos;
300
- }
301
- num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;
302
- if (num_nodes > self->forest_size_) {
303
- BROTLI_FREE(m, self->forest_);
304
- self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);
305
- if (BROTLI_IS_OOM(m)) return;
306
- self->forest_size_ = num_nodes;
307
- }
308
- self->is_dirty_ = BROTLI_FALSE;
309
- }
310
- }
311
-
312
- static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
313
- const size_t pos) {
314
- return 2 * (pos & self->window_mask_);
315
- }
316
-
317
- static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
318
- const size_t pos) {
319
- return 2 * (pos & self->window_mask_) + 1;
320
- }
321
-
322
- /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
323
- hash bucket's binary tree is searched for matches and is re-rooted at the
324
- current position.
325
-
326
- If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
327
- current position is searched for matches, but the state of the hash table
328
- is not changed, since we can not know the final sorting order of the
329
- current (incomplete) sequence.
330
-
331
- This function must be called with increasing cur_ix positions. */
332
- static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
333
- HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,
334
- const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
335
- const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
336
- BackwardMatch* BROTLI_RESTRICT matches) {
337
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
338
- const size_t max_comp_len =
339
- BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
340
- const BROTLI_BOOL should_reroot_tree =
341
- TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
342
- const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
343
- size_t prev_ix = self->buckets_[key];
344
- /* The forest index of the rightmost node of the left subtree of the new
345
- root, updated as we traverse and reroot the tree of the hash bucket. */
346
- size_t node_left = FN(LeftChildIndex)(self, cur_ix);
347
- /* The forest index of the leftmost node of the right subtree of the new
348
- root, updated as we traverse and reroot the tree of the hash bucket. */
349
- size_t node_right = FN(RightChildIndex)(self, cur_ix);
350
- /* The match length of the rightmost node of the left subtree of the new
351
- root, updated as we traverse and reroot the tree of the hash bucket. */
352
- size_t best_len_left = 0;
353
- /* The match length of the leftmost node of the right subtree of the new
354
- root, updated as we traverse and reroot the tree of the hash bucket. */
355
- size_t best_len_right = 0;
356
- size_t depth_remaining;
357
- if (should_reroot_tree) {
358
- self->buckets_[key] = (uint32_t)cur_ix;
359
- }
360
- for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
361
- const size_t backward = cur_ix - prev_ix;
362
- const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
363
- if (backward == 0 || backward > max_backward || depth_remaining == 0) {
364
- if (should_reroot_tree) {
365
- self->forest_[node_left] = self->invalid_pos_;
366
- self->forest_[node_right] = self->invalid_pos_;
367
- }
368
- break;
369
- }
370
- {
371
- const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
372
- size_t len;
373
- assert(cur_len <= MAX_TREE_COMP_LENGTH);
374
- len = cur_len +
375
- FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
376
- &data[prev_ix_masked + cur_len],
377
- max_length - cur_len);
378
- assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
379
- if (matches && len > *best_len) {
380
- *best_len = len;
381
- InitBackwardMatch(matches++, backward, len);
382
- }
383
- if (len >= max_comp_len) {
384
- if (should_reroot_tree) {
385
- self->forest_[node_left] =
386
- self->forest_[FN(LeftChildIndex)(self, prev_ix)];
387
- self->forest_[node_right] =
388
- self->forest_[FN(RightChildIndex)(self, prev_ix)];
389
- }
390
- break;
391
- }
392
- if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
393
- best_len_left = len;
394
- if (should_reroot_tree) {
395
- self->forest_[node_left] = (uint32_t)prev_ix;
396
- }
397
- node_left = FN(RightChildIndex)(self, prev_ix);
398
- prev_ix = self->forest_[node_left];
399
- } else {
400
- best_len_right = len;
401
- if (should_reroot_tree) {
402
- self->forest_[node_right] = (uint32_t)prev_ix;
403
- }
404
- node_right = FN(LeftChildIndex)(self, prev_ix);
405
- prev_ix = self->forest_[node_right];
406
- }
407
- }
408
- }
409
- return matches;
410
- }
411
-
412
- /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
413
- length of max_length and stores the position cur_ix in the hash table.
414
-
415
- Sets *num_matches to the number of matches found, and stores the found
416
- matches in matches[0] to matches[*num_matches - 1]. The matches will be
417
- sorted by strictly increasing length and (non-strictly) increasing
418
- distance. */
419
- static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
420
- const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,
421
- const size_t max_length, const size_t max_backward,
422
- const BrotliEncoderParams* params, BackwardMatch* matches) {
423
- BackwardMatch* const orig_matches = matches;
424
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
425
- size_t best_len = 1;
426
- const size_t short_match_max_backward =
427
- params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
428
- size_t stop = cur_ix - short_match_max_backward;
429
- uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
430
- size_t i;
431
- if (cur_ix < short_match_max_backward) { stop = 0; }
432
- for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
433
- size_t prev_ix = i;
434
- const size_t backward = cur_ix - prev_ix;
435
- if (PREDICT_FALSE(backward > max_backward)) {
436
- break;
437
- }
438
- prev_ix &= ring_buffer_mask;
439
- if (data[cur_ix_masked] != data[prev_ix] ||
440
- data[cur_ix_masked + 1] != data[prev_ix + 1]) {
441
- continue;
442
- }
443
- {
444
- const size_t len =
445
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
446
- max_length);
447
- if (len > best_len) {
448
- best_len = len;
449
- InitBackwardMatch(matches++, backward, len);
450
- }
451
- }
452
- }
453
- if (best_len < max_length) {
454
- matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,
455
- max_length, max_backward, &best_len, matches);
456
- }
457
- for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
458
- dict_matches[i] = kInvalidMatch;
459
- }
460
- {
461
- size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
462
- if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,
463
- max_length, &dict_matches[0])) {
464
- size_t maxlen = BROTLI_MIN(
465
- size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
466
- size_t l;
467
- for (l = minlen; l <= maxlen; ++l) {
468
- uint32_t dict_id = dict_matches[l];
469
- if (dict_id < kInvalidMatch) {
470
- InitDictionaryBackwardMatch(matches++,
471
- max_backward + (dict_id >> 5) + 1, l, dict_id & 31);
472
- }
473
- }
474
- }
475
- }
476
- return (size_t)(matches - orig_matches);
477
- }
478
-
479
- /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
480
- current sequence, without returning any matches.
481
- REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
482
- static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,
483
- const size_t mask, const size_t ix) {
484
- /* Maximum distance is window size - 16, see section 9.1. of the spec. */
485
- const size_t max_backward = self->window_mask_ - 15;
486
- FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
487
- max_backward, NULL, NULL);
488
- }
489
-
490
- static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
491
- const uint8_t *data, const size_t mask, const size_t ix_start,
492
- const size_t ix_end) {
493
- size_t i = ix_start + 63 <= ix_end ? ix_end - 63 : ix_start;
494
- for (; i < ix_end; ++i) {
495
- FN(Store)(self, data, mask, i);
496
- }
497
- }
498
-
499
- static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,
500
- size_t num_bytes, size_t position, const uint8_t* ringbuffer,
501
- size_t ringbuffer_mask) {
502
- if (num_bytes >= FN(HashTypeLength)() - 1 &&
503
- position >= MAX_TREE_COMP_LENGTH) {
504
- /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
505
- These could not be calculated before, since they require knowledge
506
- of both the previous and the current block. */
507
- const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
508
- const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
509
- size_t i;
510
- for (i = i_start; i < i_end; ++i) {
511
- /* Maximum distance is window size - 16, see section 9.1. of the spec.
512
- Furthermore, we have to make sure that we don't look further back
513
- from the start of the next block than the window size, otherwise we
514
- could access already overwritten areas of the ringbuffer. */
515
- const size_t max_backward =
516
- self->window_mask_ - BROTLI_MAX(size_t, 15, position - i);
517
- /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
518
- end of the current block and that we have at least
519
- MAX_TREE_COMP_LENGTH tail in the ringbuffer. */
520
- FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
521
- MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
522
- }
523
- }
524
- }
525
-
526
- #undef BUCKET_SIZE
527
- #undef BUCKET_BITS
528
-
529
- #undef HASHER
530
-
531
- /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
532
- a little faster (0.5% - 1%) and it compresses 0.15% better on small text
533
- and html inputs. */
534
-
535
- #define HASHER() H2
536
- #define BUCKET_BITS 16
537
- #define BUCKET_SWEEP 1
538
- #define USE_DICTIONARY 1
539
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
540
- #undef BUCKET_SWEEP
541
- #undef USE_DICTIONARY
542
- #undef HASHER
543
-
544
- #define HASHER() H3
545
- #define BUCKET_SWEEP 2
546
- #define USE_DICTIONARY 0
547
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
548
- #undef USE_DICTIONARY
549
- #undef BUCKET_SWEEP
550
- #undef BUCKET_BITS
551
- #undef HASHER
552
-
553
- #define HASHER() H4
554
- #define BUCKET_BITS 17
555
- #define BUCKET_SWEEP 4
556
- #define USE_DICTIONARY 1
557
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
558
- #undef USE_DICTIONARY
559
- #undef BUCKET_SWEEP
560
- #undef BUCKET_BITS
561
- #undef HASHER
562
-
563
- #define HASHER() H5
564
- #define BUCKET_BITS 14
565
- #define BLOCK_BITS 4
566
- #define NUM_LAST_DISTANCES_TO_CHECK 4
567
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
568
- #undef BLOCK_BITS
569
- #undef HASHER
570
-
571
- #define HASHER() H6
572
- #define BLOCK_BITS 5
573
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
574
- #undef NUM_LAST_DISTANCES_TO_CHECK
575
- #undef BLOCK_BITS
576
- #undef BUCKET_BITS
577
- #undef HASHER
578
-
579
- #define HASHER() H7
580
- #define BUCKET_BITS 15
581
- #define BLOCK_BITS 6
582
- #define NUM_LAST_DISTANCES_TO_CHECK 10
583
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
584
- #undef BLOCK_BITS
585
- #undef HASHER
586
-
587
- #define HASHER() H8
588
- #define BLOCK_BITS 7
589
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
590
- #undef NUM_LAST_DISTANCES_TO_CHECK
591
- #undef BLOCK_BITS
592
- #undef HASHER
593
-
594
- #define HASHER() H9
595
- #define BLOCK_BITS 8
596
- #define NUM_LAST_DISTANCES_TO_CHECK 16
597
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
598
- #undef NUM_LAST_DISTANCES_TO_CHECK
599
- #undef BLOCK_BITS
600
- #undef BUCKET_BITS
601
- #undef HASHER
602
-
603
- #define BUCKET_BITS 15
604
-
605
- #define NUM_LAST_DISTANCES_TO_CHECK 4
606
- #define NUM_BANKS 1
607
- #define BANK_BITS 16
608
- #define HASHER() H40
609
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
610
- #undef HASHER
611
- #undef NUM_LAST_DISTANCES_TO_CHECK
612
-
613
- #define NUM_LAST_DISTANCES_TO_CHECK 10
614
- #define HASHER() H41
615
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
616
- #undef HASHER
617
- #undef NUM_LAST_DISTANCES_TO_CHECK
618
- #undef NUM_BANKS
619
- #undef BANK_BITS
620
-
621
- #define NUM_LAST_DISTANCES_TO_CHECK 16
622
- #define NUM_BANKS 512
623
- #define BANK_BITS 9
624
- #define HASHER() H42
625
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
626
- #undef HASHER
627
- #undef NUM_LAST_DISTANCES_TO_CHECK
628
- #undef NUM_BANKS
629
- #undef BANK_BITS
630
-
631
- #undef BUCKET_BITS
632
-
633
- #undef FN
634
- #undef CAT
635
- #undef EXPAND_CAT
636
-
637
- #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \
638
- H(40) H(41) H(42)
639
- #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
640
-
641
- typedef struct Hashers {
642
- #define _MEMBER(N) H ## N* h ## N;
643
- FOR_ALL_HASHERS(_MEMBER)
644
- #undef _MEMBER
645
- } Hashers;
646
-
647
- static BROTLI_INLINE void InitHashers(Hashers* self) {
648
- #define _INIT(N) self->h ## N = 0;
649
- FOR_ALL_HASHERS(_INIT)
650
- #undef _INIT
651
- }
652
-
653
- static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {
654
- if (self->h10) CleanupH10(m, self->h10);
655
- #define _CLEANUP(N) BROTLI_FREE(m, self->h ## N)
656
- FOR_ALL_HASHERS(_CLEANUP)
657
- #undef _CLEANUP
658
- }
659
-
660
- static BROTLI_INLINE void HashersReset(Hashers* self, int type) {
661
- switch (type) {
662
- #define _RESET(N) case N: ResetH ## N(self->h ## N); break;
663
- FOR_ALL_HASHERS(_RESET)
664
- #undef _RESET
665
- default: break;
666
- }
667
- }
668
-
669
- static BROTLI_INLINE void HashersSetup(
670
- MemoryManager* m, Hashers* self, int type) {
671
- switch (type) {
672
- #define _SETUP(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;
673
- FOR_ALL_HASHERS(_SETUP)
674
- #undef _SETUP
675
- default: break;
676
- }
677
- if (BROTLI_IS_OOM(m)) return;
678
- if (type == 10) InitializeH10(self->h10);
679
- HashersReset(self, type);
680
- }
681
-
682
- #define _WARMUP_HASH(N) \
683
- static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \
684
- const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \
685
- H ## N* hasher) { \
686
- size_t overlap = (StoreLookaheadH ## N()) - 1; \
687
- size_t i; \
688
- InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \
689
- if (BROTLI_IS_OOM(m)) return; \
690
- for (i = 0; i + overlap < size; i++) { \
691
- StoreH ## N(hasher, dict, ~(size_t)0, i); \
692
- } \
693
- }
694
- FOR_ALL_HASHERS(_WARMUP_HASH)
695
- #undef _WARMUP_HASH
696
-
697
- /* Custom LZ77 window. */
698
- static BROTLI_INLINE void HashersPrependCustomDictionary(
699
- MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,
700
- const size_t size, const uint8_t* dict) {
701
- int hasher_type = ChooseHasher(params);
702
- switch (hasher_type) {
703
- #define _PREPEND(N) \
704
- case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;
705
- FOR_ALL_HASHERS(_PREPEND)
706
- #undef _PREPEND
707
- default: break;
708
- }
709
- if (BROTLI_IS_OOM(m)) return;
710
- }
711
-
712
-
713
- #if defined(__cplusplus) || defined(c_plusplus)
714
- } /* extern "C" */
715
- #endif
716
-
717
- #endif /* BROTLI_ENC_HASH_H_ */