brotli 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +11 -3
  4. data/Gemfile +2 -0
  5. data/ext/brotli/brotli.c +279 -0
  6. data/ext/brotli/brotli.h +2 -0
  7. data/ext/brotli/buffer.c +95 -0
  8. data/ext/brotli/buffer.h +19 -0
  9. data/ext/brotli/extconf.rb +21 -81
  10. data/lib/brotli/version.rb +1 -1
  11. data/vendor/brotli/dec/bit_reader.c +5 -5
  12. data/vendor/brotli/dec/bit_reader.h +15 -15
  13. data/vendor/brotli/dec/context.h +1 -1
  14. data/vendor/brotli/dec/decode.c +433 -348
  15. data/vendor/brotli/dec/decode.h +74 -48
  16. data/vendor/brotli/dec/huffman.c +5 -4
  17. data/vendor/brotli/dec/huffman.h +4 -4
  18. data/vendor/brotli/dec/port.h +2 -95
  19. data/vendor/brotli/dec/prefix.h +5 -3
  20. data/vendor/brotli/dec/state.c +15 -27
  21. data/vendor/brotli/dec/state.h +21 -17
  22. data/vendor/brotli/dec/transform.h +1 -1
  23. data/vendor/brotli/enc/backward_references.c +892 -0
  24. data/vendor/brotli/enc/backward_references.h +85 -102
  25. data/vendor/brotli/enc/backward_references_inc.h +147 -0
  26. data/vendor/brotli/enc/bit_cost.c +35 -0
  27. data/vendor/brotli/enc/bit_cost.h +23 -121
  28. data/vendor/brotli/enc/bit_cost_inc.h +127 -0
  29. data/vendor/brotli/enc/block_encoder_inc.h +33 -0
  30. data/vendor/brotli/enc/block_splitter.c +197 -0
  31. data/vendor/brotli/enc/block_splitter.h +40 -50
  32. data/vendor/brotli/enc/block_splitter_inc.h +432 -0
  33. data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
  34. data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
  35. data/vendor/brotli/enc/cluster.c +56 -0
  36. data/vendor/brotli/enc/cluster.h +23 -305
  37. data/vendor/brotli/enc/cluster_inc.h +315 -0
  38. data/vendor/brotli/enc/command.h +83 -76
  39. data/vendor/brotli/enc/compress_fragment.c +747 -0
  40. data/vendor/brotli/enc/compress_fragment.h +48 -37
  41. data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
  42. data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
  43. data/vendor/brotli/enc/compressor.cc +139 -0
  44. data/vendor/brotli/enc/compressor.h +146 -0
  45. data/vendor/brotli/enc/context.h +102 -96
  46. data/vendor/brotli/enc/dictionary_hash.h +9 -5
  47. data/vendor/brotli/enc/encode.c +1562 -0
  48. data/vendor/brotli/enc/encode.h +211 -199
  49. data/vendor/brotli/enc/encode_parallel.cc +161 -151
  50. data/vendor/brotli/enc/encode_parallel.h +7 -8
  51. data/vendor/brotli/enc/entropy_encode.c +501 -0
  52. data/vendor/brotli/enc/entropy_encode.h +107 -89
  53. data/vendor/brotli/enc/entropy_encode_static.h +29 -62
  54. data/vendor/brotli/enc/fast_log.h +26 -20
  55. data/vendor/brotli/enc/find_match_length.h +23 -20
  56. data/vendor/brotli/enc/hash.h +614 -871
  57. data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
  58. data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
  59. data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
  60. data/vendor/brotli/enc/histogram.c +95 -0
  61. data/vendor/brotli/enc/histogram.h +49 -83
  62. data/vendor/brotli/enc/histogram_inc.h +51 -0
  63. data/vendor/brotli/enc/literal_cost.c +178 -0
  64. data/vendor/brotli/enc/literal_cost.h +16 -10
  65. data/vendor/brotli/enc/memory.c +181 -0
  66. data/vendor/brotli/enc/memory.h +62 -0
  67. data/vendor/brotli/enc/metablock.c +515 -0
  68. data/vendor/brotli/enc/metablock.h +87 -57
  69. data/vendor/brotli/enc/metablock_inc.h +183 -0
  70. data/vendor/brotli/enc/port.h +73 -47
  71. data/vendor/brotli/enc/prefix.h +34 -61
  72. data/vendor/brotli/enc/quality.h +130 -0
  73. data/vendor/brotli/enc/ringbuffer.h +137 -122
  74. data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
  75. data/vendor/brotli/enc/static_dict.h +23 -18
  76. data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
  77. data/vendor/brotli/enc/streams.cc +7 -7
  78. data/vendor/brotli/enc/streams.h +32 -32
  79. data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
  80. data/vendor/brotli/enc/utf8_util.h +16 -9
  81. data/vendor/brotli/enc/write_bits.h +49 -43
  82. metadata +34 -25
  83. data/ext/brotli/brotli.cc +0 -181
  84. data/vendor/brotli/dec/Makefile +0 -12
  85. data/vendor/brotli/dec/dictionary.c +0 -9466
  86. data/vendor/brotli/dec/dictionary.h +0 -38
  87. data/vendor/brotli/dec/types.h +0 -38
  88. data/vendor/brotli/enc/Makefile +0 -14
  89. data/vendor/brotli/enc/backward_references.cc +0 -858
  90. data/vendor/brotli/enc/block_splitter.cc +0 -505
  91. data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
  92. data/vendor/brotli/enc/compress_fragment.cc +0 -701
  93. data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
  94. data/vendor/brotli/enc/dictionary.cc +0 -9466
  95. data/vendor/brotli/enc/dictionary.h +0 -41
  96. data/vendor/brotli/enc/encode.cc +0 -1180
  97. data/vendor/brotli/enc/entropy_encode.cc +0 -480
  98. data/vendor/brotli/enc/histogram.cc +0 -67
  99. data/vendor/brotli/enc/literal_cost.cc +0 -165
  100. data/vendor/brotli/enc/metablock.cc +0 -539
  101. data/vendor/brotli/enc/transform.h +0 -248
  102. data/vendor/brotli/enc/types.h +0 -29
@@ -4,30 +4,32 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- // A (forgetful) hash table to the data seen by the compressor, to
8
- // help create backward references to previous data.
7
+ /* A (forgetful) hash table to the data seen by the compressor, to
8
+ help create backward references to previous data. */
9
9
 
10
10
  #ifndef BROTLI_ENC_HASH_H_
11
11
  #define BROTLI_ENC_HASH_H_
12
12
 
13
- #include <sys/types.h>
14
- #include <algorithm>
15
- #include <cstring>
16
- #include <limits>
13
+ #include <string.h> /* memcmp, memset */
17
14
 
15
+ #include "../common/constants.h"
16
+ #include "../common/dictionary.h"
17
+ #include "../common/types.h"
18
18
  #include "./dictionary_hash.h"
19
19
  #include "./fast_log.h"
20
20
  #include "./find_match_length.h"
21
+ #include "./memory.h"
21
22
  #include "./port.h"
22
- #include "./prefix.h"
23
+ #include "./quality.h"
23
24
  #include "./static_dict.h"
24
- #include "./transform.h"
25
- #include "./types.h"
26
25
 
27
- namespace brotli {
26
+ #if defined(__cplusplus) || defined(c_plusplus)
27
+ extern "C" {
28
+ #endif
28
29
 
29
- static const size_t kMaxTreeSearchDepth = 64;
30
- static const size_t kMaxTreeCompLength = 128;
30
+ #define MAX_TREE_SEARCH_DEPTH 64
31
+ #define MAX_TREE_COMP_LENGTH 128
32
+ #define score_t size_t
31
33
 
32
34
  static const uint32_t kDistanceCacheIndex[] = {
33
35
  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
@@ -41,934 +43,675 @@ static const uint8_t kCutoffTransforms[] = {
41
43
  0, 12, 27, 23, 42, 63, 56, 48, 59, 64
42
44
  };
43
45
 
44
- // kHashMul32 multiplier has these properties:
45
- // * The multiplier must be odd. Otherwise we may lose the highest bit.
46
- // * No long streaks of 1s or 0s.
47
- // * There is no effort to ensure that it is a prime, the oddity is enough
48
- // for this use.
49
- // * The number has been tuned heuristically against compression benchmarks.
46
+ typedef struct HasherSearchResult {
47
+ size_t len;
48
+ size_t len_x_code; /* == len ^ len_code */
49
+ size_t distance;
50
+ score_t score;
51
+ } HasherSearchResult;
52
+
53
+ typedef struct DictionarySearchStatictics {
54
+ size_t num_lookups;
55
+ size_t num_matches;
56
+ } DictionarySearchStatictics;
57
+
58
+ /* kHashMul32 multiplier has these properties:
59
+ * The multiplier must be odd. Otherwise we may lose the highest bit.
60
+ * No long streaks of 1s or 0s.
61
+ * There is no effort to ensure that it is a prime, the oddity is enough
62
+ for this use.
63
+ * The number has been tuned heuristically against compression benchmarks. */
50
64
  static const uint32_t kHashMul32 = 0x1e35a7bd;
51
65
 
52
- template<int kShiftBits>
53
- inline uint32_t Hash(const uint8_t *data) {
66
+ static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
54
67
  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
55
- // The higher bits contain more mixture from the multiplication,
56
- // so we take our results from there.
57
- return h >> (32 - kShiftBits);
68
+ /* The higher bits contain more mixture from the multiplication,
69
+ so we take our results from there. */
70
+ return h >> (32 - 14);
58
71
  }
59
72
 
60
- // Usually, we always choose the longest backward reference. This function
61
- // allows for the exception of that rule.
62
- //
63
- // If we choose a backward reference that is further away, it will
64
- // usually be coded with more bits. We approximate this by assuming
65
- // log2(distance). If the distance can be expressed in terms of the
66
- // last four distances, we use some heuristic constants to estimate
67
- // the bits cost. For the first up to four literals we use the bit
68
- // cost of the literals from the literal cost model, after that we
69
- // use the average bit cost of the cost model.
70
- //
71
- // This function is used to sometimes discard a longer backward reference
72
- // when it is not much longer and the bit cost for encoding it is more
73
- // than the saved literals.
74
- //
75
- // backward_reference_offset MUST be positive.
76
- inline double BackwardReferenceScore(size_t copy_length,
77
- size_t backward_reference_offset) {
78
- return 5.4 * static_cast<double>(copy_length) -
79
- 1.20 * Log2FloorNonZero(backward_reference_offset);
73
+ #define BROTLI_LITERAL_BYTE_SCORE 540
74
+ #define BROTLI_DISTANCE_BIT_PENALTY 120
75
+ /* Score must be positive after applying maximal penalty. */
76
+ #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
77
+
78
+ /* Usually, we always choose the longest backward reference. This function
79
+ allows for the exception of that rule.
80
+
81
+ If we choose a backward reference that is further away, it will
82
+ usually be coded with more bits. We approximate this by assuming
83
+ log2(distance). If the distance can be expressed in terms of the
84
+ last four distances, we use some heuristic constants to estimate
85
+ the bits cost. For the first up to four literals we use the bit
86
+ cost of the literals from the literal cost model, after that we
87
+ use the average bit cost of the cost model.
88
+
89
+ This function is used to sometimes discard a longer backward reference
90
+ when it is not much longer and the bit cost for encoding it is more
91
+ than the saved literals.
92
+
93
+ backward_reference_offset MUST be positive. */
94
+ static BROTLI_INLINE score_t BackwardReferenceScore(
95
+ size_t copy_length, size_t backward_reference_offset) {
96
+ return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
97
+ BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
80
98
  }
81
99
 
82
- inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
83
- size_t distance_short_code) {
84
- static const double kDistanceShortCodeBitCost[16] = {
85
- -0.6, 0.95, 1.17, 1.27,
86
- 0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
87
- 1.05, 1.05, 1.15, 1.15, 1.25, 1.25
88
- };
89
- return 5.4 * static_cast<double>(copy_length) -
90
- kDistanceShortCodeBitCost[distance_short_code];
91
- }
92
-
93
- struct BackwardMatch {
94
- BackwardMatch(void) : distance(0), length_and_code(0) {}
95
-
96
- BackwardMatch(size_t dist, size_t len)
97
- : distance(static_cast<uint32_t>(dist))
98
- , length_and_code(static_cast<uint32_t>(len << 5)) {}
99
-
100
- BackwardMatch(size_t dist, size_t len, size_t len_code)
101
- : distance(static_cast<uint32_t>(dist))
102
- , length_and_code(static_cast<uint32_t>(
103
- (len << 5) | (len == len_code ? 0 : len_code))) {}
104
-
105
- size_t length(void) const {
106
- return length_and_code >> 5;
107
- }
108
- size_t length_code(void) const {
109
- size_t code = length_and_code & 31;
110
- return code ? code : length();
111
- }
112
-
113
- uint32_t distance;
114
- uint32_t length_and_code;
100
+ static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {
101
+ /* Repeat last */
102
+ BROTLI_SCORE_BASE + 60,
103
+ /* 2nd, 3rd, 4th last */
104
+ BROTLI_SCORE_BASE - 95,
105
+ BROTLI_SCORE_BASE - 117,
106
+ BROTLI_SCORE_BASE - 127,
107
+ /* Last with offset */
108
+ BROTLI_SCORE_BASE - 93,
109
+ BROTLI_SCORE_BASE - 93,
110
+ BROTLI_SCORE_BASE - 96,
111
+ BROTLI_SCORE_BASE - 96,
112
+ BROTLI_SCORE_BASE - 99,
113
+ BROTLI_SCORE_BASE - 99,
114
+ /* 2nd last with offset */
115
+ BROTLI_SCORE_BASE - 105,
116
+ BROTLI_SCORE_BASE - 105,
117
+ BROTLI_SCORE_BASE - 115,
118
+ BROTLI_SCORE_BASE - 115,
119
+ BROTLI_SCORE_BASE - 125,
120
+ BROTLI_SCORE_BASE - 125
115
121
  };
116
122
 
117
- // A (forgetful) hash table to the data seen by the compressor, to
118
- // help create backward references to previous data.
119
- //
120
- // This is a hash map of fixed size (kBucketSize). Starting from the
121
- // given index, kBucketSweep buckets are used to store values of a key.
122
- template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
123
- class HashLongestMatchQuickly {
124
- public:
125
- HashLongestMatchQuickly(void) {
126
- Reset();
127
- }
128
- void Reset(void) {
129
- need_init_ = true;
130
- num_dict_lookups_ = 0;
131
- num_dict_matches_ = 0;
132
- }
133
- void Init(void) {
134
- if (need_init_) {
135
- // It is not strictly necessary to fill this buffer here, but
136
- // not filling will make the results of the compression stochastic
137
- // (but correct). This is because random data would cause the
138
- // system to find accidentally good backward references here and there.
139
- memset(&buckets_[0], 0, sizeof(buckets_));
140
- need_init_ = false;
141
- }
142
- }
143
- void InitForData(const uint8_t* data, size_t num) {
144
- for (size_t i = 0; i < num; ++i) {
145
- const uint32_t key = HashBytes(&data[i]);
146
- memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
147
- need_init_ = false;
148
- }
149
- }
150
- // Look at 4 bytes at data.
151
- // Compute a hash from these, and store the value somewhere within
152
- // [ix .. ix+3].
153
- inline void Store(const uint8_t *data, const uint32_t ix) {
154
- const uint32_t key = HashBytes(data);
155
- // Wiggle the value with the bucket sweep range.
156
- const uint32_t off = (ix >> 3) % kBucketSweep;
157
- buckets_[key + off] = ix;
158
- }
159
-
160
- // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
161
- // up to the length of max_length and stores the position cur_ix in the
162
- // hash table.
163
- //
164
- // Does not look for matches longer than max_length.
165
- // Does not look for matches further away than max_backward.
166
- // Writes the best found match length into best_len_out.
167
- // Writes the index (&data[index]) of the start of the best match into
168
- // best_distance_out.
169
- inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
170
- const size_t ring_buffer_mask,
171
- const int* __restrict distance_cache,
172
- const size_t cur_ix,
173
- const size_t max_length,
174
- const size_t max_backward,
175
- size_t * __restrict best_len_out,
176
- size_t * __restrict best_len_code_out,
177
- size_t * __restrict best_distance_out,
178
- double* __restrict best_score_out) {
179
- const size_t best_len_in = *best_len_out;
180
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
181
- const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
182
- int compare_char = ring_buffer[cur_ix_masked + best_len_in];
183
- double best_score = *best_score_out;
184
- size_t best_len = best_len_in;
185
- size_t cached_backward = static_cast<size_t>(distance_cache[0]);
186
- size_t prev_ix = cur_ix - cached_backward;
187
- bool match_found = false;
188
- if (prev_ix < cur_ix) {
189
- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
190
- if (compare_char == ring_buffer[prev_ix + best_len]) {
191
- size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
192
- &ring_buffer[cur_ix_masked],
193
- max_length);
194
- if (len >= 4) {
195
- best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
196
- best_len = len;
197
- *best_len_out = len;
198
- *best_len_code_out = len;
199
- *best_distance_out = cached_backward;
200
- *best_score_out = best_score;
201
- compare_char = ring_buffer[cur_ix_masked + best_len];
202
- if (kBucketSweep == 1) {
203
- buckets_[key] = static_cast<uint32_t>(cur_ix);
204
- return true;
205
- } else {
206
- match_found = true;
207
- }
208
- }
209
- }
210
- }
211
- if (kBucketSweep == 1) {
212
- // Only one to look for, don't bother to prepare for a loop.
213
- prev_ix = buckets_[key];
214
- buckets_[key] = static_cast<uint32_t>(cur_ix);
215
- size_t backward = cur_ix - prev_ix;
216
- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
217
- if (compare_char != ring_buffer[prev_ix + best_len_in]) {
218
- return false;
219
- }
220
- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
221
- return false;
222
- }
223
- const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
224
- &ring_buffer[cur_ix_masked],
225
- max_length);
226
- if (len >= 4) {
227
- *best_len_out = len;
228
- *best_len_code_out = len;
229
- *best_distance_out = backward;
230
- *best_score_out = BackwardReferenceScore(len, backward);
231
- return true;
232
- }
233
- } else {
234
- uint32_t *bucket = buckets_ + key;
235
- prev_ix = *bucket++;
236
- for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
237
- const size_t backward = cur_ix - prev_ix;
238
- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
239
- if (compare_char != ring_buffer[prev_ix + best_len]) {
240
- continue;
241
- }
242
- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
243
- continue;
244
- }
245
- const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
246
- &ring_buffer[cur_ix_masked],
247
- max_length);
248
- if (len >= 4) {
249
- const double score = BackwardReferenceScore(len, backward);
250
- if (best_score < score) {
251
- best_score = score;
252
- best_len = len;
253
- *best_len_out = best_len;
254
- *best_len_code_out = best_len;
255
- *best_distance_out = backward;
256
- *best_score_out = score;
257
- compare_char = ring_buffer[cur_ix_masked + best_len];
258
- match_found = true;
259
- }
260
- }
261
- }
262
- }
263
- if (kUseDictionary && !match_found &&
264
- num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
265
- ++num_dict_lookups_;
266
- const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
267
- const uint16_t v = kStaticDictionaryHash[dict_key];
268
- if (v > 0) {
269
- const uint32_t len = v & 31;
270
- const uint32_t dist = v >> 5;
271
- const size_t offset =
272
- kBrotliDictionaryOffsetsByLength[len] + len * dist;
273
- if (len <= max_length) {
274
- const size_t matchlen =
275
- FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
276
- &kBrotliDictionary[offset], len);
277
- if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
278
- const size_t transform_id = kCutoffTransforms[len - matchlen];
279
- const size_t word_id =
280
- transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
281
- dist;
282
- const size_t backward = max_backward + word_id + 1;
283
- const double score = BackwardReferenceScore(matchlen, backward);
284
- if (best_score < score) {
285
- ++num_dict_matches_;
286
- best_score = score;
287
- best_len = matchlen;
288
- *best_len_out = best_len;
289
- *best_len_code_out = len;
290
- *best_distance_out = backward;
291
- *best_score_out = best_score;
292
- match_found = true;
293
- }
294
- }
295
- }
296
- }
297
- }
298
- const uint32_t off = (cur_ix >> 3) % kBucketSweep;
299
- buckets_[key + off] = static_cast<uint32_t>(cur_ix);
300
- return match_found;
301
- }
302
-
303
- enum { kHashLength = 5 };
304
- enum { kHashTypeLength = 8 };
305
- // HashBytes is the function that chooses the bucket to place
306
- // the address in. The HashLongestMatch and HashLongestMatchQuickly
307
- // classes have separate, different implementations of hashing.
308
- static uint32_t HashBytes(const uint8_t *data) {
309
- // Computing a hash based on 5 bytes works much better for
310
- // qualities 1 and 3, where the next hash value is likely to replace
311
- uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
312
- // The higher bits contain more mixture from the multiplication,
313
- // so we take our results from there.
314
- return static_cast<uint32_t>(h >> (64 - kBucketBits));
315
- }
316
-
317
- enum { kHashMapSize = 4 << kBucketBits };
318
-
319
- private:
320
- static const uint32_t kBucketSize = 1 << kBucketBits;
321
- uint32_t buckets_[kBucketSize + kBucketSweep];
322
- // True if buckets_ array needs to be initialized.
323
- bool need_init_;
324
- size_t num_dict_lookups_;
325
- size_t num_dict_matches_;
326
- };
123
+ static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
124
+ size_t copy_length, size_t distance_short_code) {
125
+ return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
126
+ kDistanceShortCodeCost[distance_short_code];
127
+ }
327
128
 
328
- // A (forgetful) hash table to the data seen by the compressor, to
329
- // help create backward references to previous data.
330
- //
331
- // This is a hash map of fixed size (kBucketSize) to a ring buffer of
332
- // fixed size (kBlockSize). The ring buffer contains the last kBlockSize
333
- // index positions of the given hash key in the compressed data.
334
- template <int kBucketBits,
335
- int kBlockBits,
336
- int kNumLastDistancesToCheck>
337
- class HashLongestMatch {
338
- public:
339
- HashLongestMatch(void) {
340
- Reset();
341
- }
129
+ static BROTLI_INLINE void DictionarySearchStaticticsReset(
130
+ DictionarySearchStatictics* self) {
131
+ self->num_lookups = 0;
132
+ self->num_matches = 0;
133
+ }
342
134
 
343
- void Reset(void) {
344
- need_init_ = true;
345
- num_dict_lookups_ = 0;
346
- num_dict_matches_ = 0;
347
- }
135
+ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
136
+ size_t item, const uint8_t* data, size_t max_length, size_t max_backward,
137
+ HasherSearchResult* out) {
138
+ size_t len;
139
+ size_t dist;
140
+ size_t offset;
141
+ size_t matchlen;
142
+ size_t backward;
143
+ score_t score;
144
+ len = item & 31;
145
+ dist = item >> 5;
146
+ offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
147
+ if (len > max_length) {
148
+ return BROTLI_FALSE;
149
+ }
150
+
151
+ matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);
152
+ if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
153
+ return BROTLI_FALSE;
154
+ }
155
+ {
156
+ size_t transform_id = kCutoffTransforms[len - matchlen];
157
+ backward = max_backward + dist + 1 +
158
+ (transform_id << kBrotliDictionarySizeBitsByLength[len]);
159
+ }
160
+ score = BackwardReferenceScore(matchlen, backward);
161
+ if (score < out->score) {
162
+ return BROTLI_FALSE;
163
+ }
164
+ out->len = matchlen;
165
+ out->len_x_code = len ^ matchlen;
166
+ out->distance = backward;
167
+ out->score = score;
168
+ return BROTLI_TRUE;
169
+ }
348
170
 
349
- void Init(void) {
350
- if (need_init_) {
351
- memset(&num_[0], 0, sizeof(num_));
352
- need_init_ = false;
171
+ static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(
172
+ DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,
173
+ size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
174
+ size_t key;
175
+ size_t i;
176
+ BROTLI_BOOL is_match_found = BROTLI_FALSE;
177
+ if (self->num_matches < (self->num_lookups >> 7)) {
178
+ return BROTLI_FALSE;
179
+ }
180
+ key = Hash14(data) << 1;
181
+ for (i = 0; i < (shallow ? 1 : 2); ++i, ++key) {
182
+ size_t item = kStaticDictionaryHash[key];
183
+ self->num_lookups++;
184
+ if (item != 0 &&
185
+ TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {
186
+ self->num_matches++;
187
+ is_match_found = BROTLI_TRUE;
353
188
  }
354
189
  }
190
+ return is_match_found;
191
+ }
355
192
 
356
- void InitForData(const uint8_t* data, size_t num) {
357
- for (size_t i = 0; i < num; ++i) {
358
- const uint32_t key = HashBytes(&data[i]);
359
- num_[key] = 0;
360
- need_init_ = false;
361
- }
362
- }
193
+ typedef struct BackwardMatch {
194
+ uint32_t distance;
195
+ uint32_t length_and_code;
196
+ } BackwardMatch;
363
197
 
364
- // Look at 3 bytes at data.
365
- // Compute a hash from these, and store the value of ix at that position.
366
- inline void Store(const uint8_t *data, const uint32_t ix) {
367
- const uint32_t key = HashBytes(data);
368
- const int minor_ix = num_[key] & kBlockMask;
369
- buckets_[key][minor_ix] = ix;
370
- ++num_[key];
371
- }
198
+ static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
199
+ size_t dist, size_t len) {
200
+ self->distance = (uint32_t)dist;
201
+ self->length_and_code = (uint32_t)(len << 5);
202
+ }
372
203
 
373
- // Find a longest backward match of &data[cur_ix] up to the length of
374
- // max_length and stores the position cur_ix in the hash table.
375
- //
376
- // Does not look for matches longer than max_length.
377
- // Does not look for matches further away than max_backward.
378
- // Writes the best found match length into best_len_out.
379
- // Writes the index (&data[index]) offset from the start of the best match
380
- // into best_distance_out.
381
- // Write the score of the best match into best_score_out.
382
- bool FindLongestMatch(const uint8_t * __restrict data,
383
- const size_t ring_buffer_mask,
384
- const int* __restrict distance_cache,
385
- const size_t cur_ix,
386
- const size_t max_length,
387
- const size_t max_backward,
388
- size_t * __restrict best_len_out,
389
- size_t * __restrict best_len_code_out,
390
- size_t * __restrict best_distance_out,
391
- double * __restrict best_score_out) {
392
- *best_len_code_out = 0;
393
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
394
- bool match_found = false;
395
- // Don't accept a short copy from far away.
396
- double best_score = *best_score_out;
397
- size_t best_len = *best_len_out;
398
- *best_len_out = 0;
399
- // Try last distance first.
400
- for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
401
- const size_t idx = kDistanceCacheIndex[i];
402
- const size_t backward =
403
- static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
404
- size_t prev_ix = static_cast<size_t>(cur_ix - backward);
405
- if (prev_ix >= cur_ix) {
406
- continue;
407
- }
408
- if (PREDICT_FALSE(backward > max_backward)) {
409
- continue;
410
- }
411
- prev_ix &= ring_buffer_mask;
204
+ static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
205
+ size_t dist, size_t len, size_t len_code) {
206
+ self->distance = (uint32_t)dist;
207
+ self->length_and_code =
208
+ (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
209
+ }
412
210
 
413
- if (cur_ix_masked + best_len > ring_buffer_mask ||
414
- prev_ix + best_len > ring_buffer_mask ||
415
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
416
- continue;
417
- }
418
- const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
419
- &data[cur_ix_masked],
420
- max_length);
421
- if (len >= 3 || (len == 2 && i < 2)) {
422
- // Comparing for >= 2 does not change the semantics, but just saves for
423
- // a few unnecessary binary logarithms in backward reference score,
424
- // since we are not interested in such short matches.
425
- double score = BackwardReferenceScoreUsingLastDistance(len, i);
426
- if (best_score < score) {
427
- best_score = score;
428
- best_len = len;
429
- *best_len_out = best_len;
430
- *best_len_code_out = best_len;
431
- *best_distance_out = backward;
432
- *best_score_out = best_score;
433
- match_found = true;
434
- }
435
- }
436
- }
437
- const uint32_t key = HashBytes(&data[cur_ix_masked]);
438
- const uint32_t * __restrict const bucket = &buckets_[key][0];
439
- const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
440
- for (size_t i = num_[key]; i > down;) {
441
- --i;
442
- size_t prev_ix = bucket[i & kBlockMask];
443
- const size_t backward = cur_ix - prev_ix;
444
- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
445
- break;
446
- }
447
- prev_ix &= ring_buffer_mask;
448
- if (cur_ix_masked + best_len > ring_buffer_mask ||
449
- prev_ix + best_len > ring_buffer_mask ||
450
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
451
- continue;
452
- }
453
- const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
454
- &data[cur_ix_masked],
455
- max_length);
456
- if (len >= 4) {
457
- // Comparing for >= 3 does not change the semantics, but just saves
458
- // for a few unnecessary binary logarithms in backward reference
459
- // score, since we are not interested in such short matches.
460
- double score = BackwardReferenceScore(len, backward);
461
- if (best_score < score) {
462
- best_score = score;
463
- best_len = len;
464
- *best_len_out = best_len;
465
- *best_len_code_out = best_len;
466
- *best_distance_out = backward;
467
- *best_score_out = best_score;
468
- match_found = true;
469
- }
470
- }
471
- }
472
- buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
473
- ++num_[key];
474
- if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
475
- size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
476
- for (int k = 0; k < 2; ++k, ++dict_key) {
477
- ++num_dict_lookups_;
478
- const uint16_t v = kStaticDictionaryHash[dict_key];
479
- if (v > 0) {
480
- const size_t len = v & 31;
481
- const size_t dist = v >> 5;
482
- const size_t offset =
483
- kBrotliDictionaryOffsetsByLength[len] + len * dist;
484
- if (len <= max_length) {
485
- const size_t matchlen =
486
- FindMatchLengthWithLimit(&data[cur_ix_masked],
487
- &kBrotliDictionary[offset], len);
488
- if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
489
- const size_t transform_id = kCutoffTransforms[len - matchlen];
490
- const size_t word_id =
491
- transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
492
- dist;
493
- const size_t backward = max_backward + word_id + 1;
494
- double score = BackwardReferenceScore(matchlen, backward);
495
- if (best_score < score) {
496
- ++num_dict_matches_;
497
- best_score = score;
498
- best_len = matchlen;
499
- *best_len_out = best_len;
500
- *best_len_code_out = len;
501
- *best_distance_out = backward;
502
- *best_score_out = best_score;
503
- match_found = true;
504
- }
505
- }
506
- }
507
- }
508
- }
509
- }
510
- return match_found;
511
- }
211
+ static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
212
+ return self->length_and_code >> 5;
213
+ }
512
214
 
513
- // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
514
- // length of max_length and stores the position cur_ix in the hash table.
515
- //
516
- // Sets *num_matches to the number of matches found, and stores the found
517
- // matches in matches[0] to matches[*num_matches - 1]. The matches will be
518
- // sorted by strictly increasing length and (non-strictly) increasing
519
- // distance.
520
- size_t FindAllMatches(const uint8_t* data,
521
- const size_t ring_buffer_mask,
522
- const size_t cur_ix,
523
- const size_t max_length,
524
- const size_t max_backward,
525
- BackwardMatch* matches) {
526
- BackwardMatch* const orig_matches = matches;
527
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
528
- size_t best_len = 1;
529
- size_t stop = cur_ix - 64;
530
- if (cur_ix < 64) { stop = 0; }
531
- for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
532
- size_t prev_ix = i;
533
- const size_t backward = cur_ix - prev_ix;
534
- if (PREDICT_FALSE(backward > max_backward)) {
535
- break;
536
- }
537
- prev_ix &= ring_buffer_mask;
538
- if (data[cur_ix_masked] != data[prev_ix] ||
539
- data[cur_ix_masked + 1] != data[prev_ix + 1]) {
540
- continue;
541
- }
542
- const size_t len =
543
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
544
- max_length);
545
- if (len > best_len) {
546
- best_len = len;
547
- *matches++ = BackwardMatch(backward, len);
548
- }
549
- }
550
- const uint32_t key = HashBytes(&data[cur_ix_masked]);
551
- const uint32_t * __restrict const bucket = &buckets_[key][0];
552
- const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
553
- for (size_t i = num_[key]; i > down;) {
554
- --i;
555
- size_t prev_ix = bucket[i & kBlockMask];
556
- const size_t backward = cur_ix - prev_ix;
557
- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
558
- break;
559
- }
560
- prev_ix &= ring_buffer_mask;
561
- if (cur_ix_masked + best_len > ring_buffer_mask ||
562
- prev_ix + best_len > ring_buffer_mask ||
563
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
564
- continue;
565
- }
566
- const size_t len =
567
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
568
- max_length);
569
- if (len > best_len) {
570
- best_len = len;
571
- *matches++ = BackwardMatch(backward, len);
572
- }
573
- }
574
- buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
575
- ++num_[key];
576
- uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
577
- for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
578
- dict_matches[i] = kInvalidMatch;
579
- }
580
- size_t minlen = std::max<size_t>(4, best_len + 1);
581
- if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
582
- &dict_matches[0])) {
583
- size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
584
- for (size_t l = minlen; l <= maxlen; ++l) {
585
- uint32_t dict_id = dict_matches[l];
586
- if (dict_id < kInvalidMatch) {
587
- *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
588
- dict_id & 31);
589
- }
590
- }
591
- }
592
- return static_cast<size_t>(matches - orig_matches);
593
- }
215
+ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
216
+ size_t code = self->length_and_code & 31;
217
+ return code ? code : BackwardMatchLength(self);
218
+ }
594
219
 
595
- enum { kHashLength = 4 };
596
- enum { kHashTypeLength = 4 };
597
-
598
- // HashBytes is the function that chooses the bucket to place
599
- // the address in. The HashLongestMatch and HashLongestMatchQuickly
600
- // classes have separate, different implementations of hashing.
601
- static uint32_t HashBytes(const uint8_t *data) {
602
- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
603
- // The higher bits contain more mixture from the multiplication,
604
- // so we take our results from there.
605
- return h >> (32 - kBucketBits);
606
- }
220
+ #define EXPAND_CAT(a, b) CAT(a, b)
221
+ #define CAT(a, b) a ## b
222
+ #define FN(X) EXPAND_CAT(X, HASHER())
607
223
 
608
- enum { kHashMapSize = 2 << kBucketBits };
224
+ #define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)
609
225
 
610
- static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
226
+ #define HASHER() H10
227
+ #define HashToBinaryTree HASHER()
611
228
 
612
- private:
613
- // Number of hash buckets.
614
- static const uint32_t kBucketSize = 1 << kBucketBits;
229
+ #define BUCKET_BITS 17
230
+ #define BUCKET_SIZE (1 << BUCKET_BITS)
615
231
 
616
- // Only kBlockSize newest backward references are kept,
617
- // and the older are forgotten.
618
- static const uint32_t kBlockSize = 1 << kBlockBits;
232
+ static size_t FN(HashTypeLength)(void) { return 4; }
233
+ static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }
619
234
 
620
- // Mask for accessing entries in a block (in a ringbuffer manner).
621
- static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
235
+ static uint32_t FN(HashBytes)(const uint8_t *data) {
236
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
237
+ /* The higher bits contain more mixture from the multiplication,
238
+ so we take our results from there. */
239
+ return h >> (32 - BUCKET_BITS);
240
+ }
622
241
 
623
- // Number of entries in a particular bucket.
624
- uint16_t num_[kBucketSize];
242
+ /* A (forgetful) hash table where each hash bucket contains a binary tree of
243
+ sequences whose first 4 bytes share the same hash code.
244
+ Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
245
+ position in the input data. The binary tree is sorted by the lexicographic
246
+ order of the sequences, and it is also a max-heap with respect to the
247
+ starting positions. */
248
+ typedef struct HashToBinaryTree {
249
+ /* The window size minus 1 */
250
+ size_t window_mask_;
625
251
 
626
- // Buckets containing kBlockSize of backward references.
627
- uint32_t buckets_[kBucketSize][kBlockSize];
252
+ /* Hash table that maps the 4-byte hashes of the sequence to the last
253
+ position where this hash was found, which is the root of the binary
254
+ tree of sequences that share this hash bucket. */
255
+ uint32_t buckets_[BUCKET_SIZE];
628
256
 
629
- // True if num_ array needs to be initialized.
630
- bool need_init_;
257
+ /* The union of the binary trees of each hash bucket. The root of the tree
258
+ corresponding to a hash is a sequence starting at buckets_[hash] and
259
+ the left and right children of a sequence starting at pos are
260
+ forest_[2 * pos] and forest_[2 * pos + 1]. */
261
+ uint32_t* forest_;
631
262
 
632
- size_t num_dict_lookups_;
633
- size_t num_dict_matches_;
634
- };
263
+ /* A position used to mark a non-existent sequence, i.e. a tree is empty if
264
+ its root is at invalid_pos_ and a node is a leaf if both its children
265
+ are at invalid_pos_. */
266
+ uint32_t invalid_pos_;
635
267
 
636
- // A (forgetful) hash table where each hash bucket contains a binary tree of
637
- // sequences whose first 4 bytes share the same hash code.
638
- // Each sequence is kMaxTreeCompLength long and is identified by its starting
639
- // position in the input data. The binary tree is sorted by the lexicographic
640
- // order of the sequences, and it is also a max-heap with respect to the
641
- // starting positions.
642
- class HashToBinaryTree {
643
- public:
644
- HashToBinaryTree() : forest_(NULL) {
645
- Reset();
646
- }
268
+ size_t forest_size_;
269
+ BROTLI_BOOL is_dirty_;
270
+ } HashToBinaryTree;
647
271
 
648
- ~HashToBinaryTree() {
649
- delete[] forest_;
650
- }
272
+ static void FN(Reset)(HashToBinaryTree* self) {
273
+ self->is_dirty_ = BROTLI_TRUE;
274
+ }
651
275
 
652
- void Reset() {
653
- need_init_ = true;
654
- }
276
+ static void FN(Initialize)(HashToBinaryTree* self) {
277
+ self->forest_ = NULL;
278
+ self->forest_size_ = 0;
279
+ FN(Reset)(self);
280
+ }
655
281
 
656
- void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
657
- if (need_init_) {
658
- window_mask_ = (1u << lgwin) - 1u;
659
- invalid_pos_ = static_cast<uint32_t>(-window_mask_);
660
- for (uint32_t i = 0; i < kBucketSize; i++) {
661
- buckets_[i] = invalid_pos_;
662
- }
663
- size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
664
- forest_ = new uint32_t[2 * num_nodes];
665
- need_init_ = false;
666
- }
667
- }
282
+ static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {
283
+ BROTLI_FREE(m, self->forest_);
284
+ }
668
285
 
669
- // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
670
- // length of max_length and stores the position cur_ix in the hash table.
671
- //
672
- // Sets *num_matches to the number of matches found, and stores the found
673
- // matches in matches[0] to matches[*num_matches - 1]. The matches will be
674
- // sorted by strictly increasing length and (non-strictly) increasing
675
- // distance.
676
- size_t FindAllMatches(const uint8_t* data,
677
- const size_t ring_buffer_mask,
678
- const size_t cur_ix,
679
- const size_t max_length,
680
- const size_t max_backward,
681
- BackwardMatch* matches) {
682
- BackwardMatch* const orig_matches = matches;
683
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
684
- size_t best_len = 1;
685
- size_t stop = cur_ix - 64;
686
- if (cur_ix < 64) { stop = 0; }
687
- for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
688
- size_t prev_ix = i;
689
- const size_t backward = cur_ix - prev_ix;
690
- if (PREDICT_FALSE(backward > max_backward)) {
691
- break;
692
- }
693
- prev_ix &= ring_buffer_mask;
694
- if (data[cur_ix_masked] != data[prev_ix] ||
695
- data[cur_ix_masked + 1] != data[prev_ix + 1]) {
696
- continue;
697
- }
698
- const size_t len =
699
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
700
- max_length);
701
- if (len > best_len) {
702
- best_len = len;
703
- *matches++ = BackwardMatch(backward, len);
704
- }
705
- }
706
- if (best_len < max_length) {
707
- matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
708
- max_length, &best_len, matches);
709
- }
710
- uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
711
- for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
712
- dict_matches[i] = kInvalidMatch;
286
+ static void FN(Init)(
287
+ MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,
288
+ const BrotliEncoderParams* params, size_t position, size_t bytes,
289
+ BROTLI_BOOL is_last) {
290
+ if (self->is_dirty_) {
291
+ uint32_t invalid_pos;
292
+ size_t num_nodes;
293
+ uint32_t i;
294
+ BROTLI_UNUSED(data);
295
+ self->window_mask_ = (1u << params->lgwin) - 1u;
296
+ invalid_pos = (uint32_t)(0 - self->window_mask_);
297
+ self->invalid_pos_ = invalid_pos;
298
+ for (i = 0; i < BUCKET_SIZE; i++) {
299
+ self->buckets_[i] = invalid_pos;
713
300
  }
714
- size_t minlen = std::max<size_t>(4, best_len + 1);
715
- if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
716
- &dict_matches[0])) {
717
- size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
718
- for (size_t l = minlen; l <= maxlen; ++l) {
719
- uint32_t dict_id = dict_matches[l];
720
- if (dict_id < kInvalidMatch) {
721
- *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
722
- dict_id & 31);
723
- }
724
- }
301
+ num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;
302
+ if (num_nodes > self->forest_size_) {
303
+ BROTLI_FREE(m, self->forest_);
304
+ self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);
305
+ if (BROTLI_IS_OOM(m)) return;
306
+ self->forest_size_ = num_nodes;
725
307
  }
726
- return static_cast<size_t>(matches - orig_matches);
308
+ self->is_dirty_ = BROTLI_FALSE;
727
309
  }
310
+ }
728
311
 
729
- // Stores the hash of the next 4 bytes and re-roots the binary tree at the
730
- // current sequence, without returning any matches.
731
- // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
732
- void Store(const uint8_t* data,
733
- const size_t ring_buffer_mask,
734
- const size_t cur_ix) {
735
- size_t best_len = 0;
736
- StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
737
- &best_len, NULL);
738
- }
312
+ static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
313
+ const size_t pos) {
314
+ return 2 * (pos & self->window_mask_);
315
+ }
739
316
 
740
- void StitchToPreviousBlock(size_t num_bytes,
741
- size_t position,
742
- const uint8_t* ringbuffer,
743
- size_t ringbuffer_mask) {
744
- if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
745
- // Store the last `kMaxTreeCompLength - 1` positions in the hasher.
746
- // These could not be calculated before, since they require knowledge
747
- // of both the previous and the current block.
748
- const size_t i_start = position - kMaxTreeCompLength + 1;
749
- const size_t i_end = std::min(position, i_start + num_bytes);
750
- for (size_t i = i_start; i < i_end; ++i) {
751
- // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
752
- // end of the current block and that we have at least
753
- // kMaxTreeCompLength tail in the ringbuffer.
754
- Store(ringbuffer, ringbuffer_mask, i);
755
- }
756
- }
757
- }
317
+ static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
318
+ const size_t pos) {
319
+ return 2 * (pos & self->window_mask_) + 1;
320
+ }
758
321
 
759
- static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
760
-
761
- private:
762
- // Stores the hash of the next 4 bytes and in a single tree-traversal, the
763
- // hash bucket's binary tree is searched for matches and is re-rooted at the
764
- // current position.
765
- //
766
- // If less than kMaxTreeCompLength data is available, the hash bucket of the
767
- // current position is searched for matches, but the state of the hash table
768
- // is not changed, since we can not know the final sorting order of the
769
- // current (incomplete) sequence.
770
- //
771
- // This function must be called with increasing cur_ix positions.
772
- BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
773
- const size_t cur_ix,
774
- const size_t ring_buffer_mask,
775
- const size_t max_length,
776
- size_t* const __restrict best_len,
777
- BackwardMatch* __restrict matches) {
778
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
779
- const size_t max_backward = window_mask_ - 15;
780
- const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
781
- const bool reroot_tree = max_length >= kMaxTreeCompLength;
782
- const uint32_t key = HashBytes(&data[cur_ix_masked]);
783
- size_t prev_ix = buckets_[key];
784
- // The forest index of the rightmost node of the left subtree of the new
785
- // root, updated as we traverse and reroot the tree of the hash bucket.
786
- size_t node_left = LeftChildIndex(cur_ix);
787
- // The forest index of the leftmost node of the right subtree of the new
788
- // root, updated as we traverse and reroot the tree of the hash bucket.
789
- size_t node_right = RightChildIndex(cur_ix);
790
- // The match length of the rightmost node of the left subtree of the new
791
- // root, updated as we traverse and reroot the tree of the hash bucket.
792
- size_t best_len_left = 0;
793
- // The match length of the leftmost node of the right subtree of the new
794
- // root, updated as we traverse and reroot the tree of the hash bucket.
795
- size_t best_len_right = 0;
796
- if (reroot_tree) {
797
- buckets_[key] = static_cast<uint32_t>(cur_ix);
798
- }
799
- for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
800
- const size_t backward = cur_ix - prev_ix;
801
- const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
802
- if (backward == 0 || backward > max_backward || depth_remaining == 0) {
803
- if (reroot_tree) {
804
- forest_[node_left] = invalid_pos_;
805
- forest_[node_right] = invalid_pos_;
806
- }
807
- break;
322
+ /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
323
+ hash bucket's binary tree is searched for matches and is re-rooted at the
324
+ current position.
325
+
326
+ If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
327
+ current position is searched for matches, but the state of the hash table
328
+ is not changed, since we can not know the final sorting order of the
329
+ current (incomplete) sequence.
330
+
331
+ This function must be called with increasing cur_ix positions. */
332
+ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
333
+ HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,
334
+ const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
335
+ const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
336
+ BackwardMatch* BROTLI_RESTRICT matches) {
337
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
338
+ const size_t max_comp_len =
339
+ BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
340
+ const BROTLI_BOOL should_reroot_tree =
341
+ TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
342
+ const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
343
+ size_t prev_ix = self->buckets_[key];
344
+ /* The forest index of the rightmost node of the left subtree of the new
345
+ root, updated as we traverse and reroot the tree of the hash bucket. */
346
+ size_t node_left = FN(LeftChildIndex)(self, cur_ix);
347
+ /* The forest index of the leftmost node of the right subtree of the new
348
+ root, updated as we traverse and reroot the tree of the hash bucket. */
349
+ size_t node_right = FN(RightChildIndex)(self, cur_ix);
350
+ /* The match length of the rightmost node of the left subtree of the new
351
+ root, updated as we traverse and reroot the tree of the hash bucket. */
352
+ size_t best_len_left = 0;
353
+ /* The match length of the leftmost node of the right subtree of the new
354
+ root, updated as we traverse and reroot the tree of the hash bucket. */
355
+ size_t best_len_right = 0;
356
+ size_t depth_remaining;
357
+ if (should_reroot_tree) {
358
+ self->buckets_[key] = (uint32_t)cur_ix;
359
+ }
360
+ for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
361
+ const size_t backward = cur_ix - prev_ix;
362
+ const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
363
+ if (backward == 0 || backward > max_backward || depth_remaining == 0) {
364
+ if (should_reroot_tree) {
365
+ self->forest_[node_left] = self->invalid_pos_;
366
+ self->forest_[node_right] = self->invalid_pos_;
808
367
  }
809
- const size_t cur_len = std::min(best_len_left, best_len_right);
810
- const size_t len = cur_len +
368
+ break;
369
+ }
370
+ {
371
+ const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
372
+ size_t len;
373
+ assert(cur_len <= MAX_TREE_COMP_LENGTH);
374
+ len = cur_len +
811
375
  FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
812
376
  &data[prev_ix_masked + cur_len],
813
377
  max_length - cur_len);
814
- if (len > *best_len) {
378
+ assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
379
+ if (matches && len > *best_len) {
815
380
  *best_len = len;
816
- if (matches) {
817
- *matches++ = BackwardMatch(backward, len);
818
- }
819
- if (len >= max_comp_len) {
820
- if (reroot_tree) {
821
- forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
822
- forest_[node_right] = forest_[RightChildIndex(prev_ix)];
823
- }
824
- break;
381
+ InitBackwardMatch(matches++, backward, len);
382
+ }
383
+ if (len >= max_comp_len) {
384
+ if (should_reroot_tree) {
385
+ self->forest_[node_left] =
386
+ self->forest_[FN(LeftChildIndex)(self, prev_ix)];
387
+ self->forest_[node_right] =
388
+ self->forest_[FN(RightChildIndex)(self, prev_ix)];
825
389
  }
390
+ break;
826
391
  }
827
392
  if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
828
393
  best_len_left = len;
829
- if (reroot_tree) {
830
- forest_[node_left] = static_cast<uint32_t>(prev_ix);
394
+ if (should_reroot_tree) {
395
+ self->forest_[node_left] = (uint32_t)prev_ix;
831
396
  }
832
- node_left = RightChildIndex(prev_ix);
833
- prev_ix = forest_[node_left];
397
+ node_left = FN(RightChildIndex)(self, prev_ix);
398
+ prev_ix = self->forest_[node_left];
834
399
  } else {
835
400
  best_len_right = len;
836
- if (reroot_tree) {
837
- forest_[node_right] = static_cast<uint32_t>(prev_ix);
401
+ if (should_reroot_tree) {
402
+ self->forest_[node_right] = (uint32_t)prev_ix;
838
403
  }
839
- node_right = LeftChildIndex(prev_ix);
840
- prev_ix = forest_[node_right];
404
+ node_right = FN(LeftChildIndex)(self, prev_ix);
405
+ prev_ix = self->forest_[node_right];
841
406
  }
842
407
  }
843
- return matches;
844
- }
845
-
846
- inline size_t LeftChildIndex(const size_t pos) {
847
- return 2 * (pos & window_mask_);
848
408
  }
409
+ return matches;
410
+ }
849
411
 
850
- inline size_t RightChildIndex(const size_t pos) {
851
- return 2 * (pos & window_mask_) + 1;
412
+ /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
413
+ length of max_length and stores the position cur_ix in the hash table.
414
+
415
+ Sets *num_matches to the number of matches found, and stores the found
416
+ matches in matches[0] to matches[*num_matches - 1]. The matches will be
417
+ sorted by strictly increasing length and (non-strictly) increasing
418
+ distance. */
419
+ static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
420
+ const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,
421
+ const size_t max_length, const size_t max_backward,
422
+ const BrotliEncoderParams* params, BackwardMatch* matches) {
423
+ BackwardMatch* const orig_matches = matches;
424
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
425
+ size_t best_len = 1;
426
+ const size_t short_match_max_backward =
427
+ params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
428
+ size_t stop = cur_ix - short_match_max_backward;
429
+ uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
430
+ size_t i;
431
+ if (cur_ix < short_match_max_backward) { stop = 0; }
432
+ for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
433
+ size_t prev_ix = i;
434
+ const size_t backward = cur_ix - prev_ix;
435
+ if (PREDICT_FALSE(backward > max_backward)) {
436
+ break;
437
+ }
438
+ prev_ix &= ring_buffer_mask;
439
+ if (data[cur_ix_masked] != data[prev_ix] ||
440
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
441
+ continue;
442
+ }
443
+ {
444
+ const size_t len =
445
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
446
+ max_length);
447
+ if (len > best_len) {
448
+ best_len = len;
449
+ InitBackwardMatch(matches++, backward, len);
450
+ }
451
+ }
852
452
  }
853
-
854
- static uint32_t HashBytes(const uint8_t *data) {
855
- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
856
- // The higher bits contain more mixture from the multiplication,
857
- // so we take our results from there.
858
- return h >> (32 - kBucketBits);
453
+ if (best_len < max_length) {
454
+ matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,
455
+ max_length, max_backward, &best_len, matches);
456
+ }
457
+ for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
458
+ dict_matches[i] = kInvalidMatch;
459
+ }
460
+ {
461
+ size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
462
+ if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,
463
+ max_length, &dict_matches[0])) {
464
+ size_t maxlen = BROTLI_MIN(
465
+ size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
466
+ size_t l;
467
+ for (l = minlen; l <= maxlen; ++l) {
468
+ uint32_t dict_id = dict_matches[l];
469
+ if (dict_id < kInvalidMatch) {
470
+ InitDictionaryBackwardMatch(matches++,
471
+ max_backward + (dict_id >> 5) + 1, l, dict_id & 31);
472
+ }
473
+ }
474
+ }
859
475
  }
476
+ return (size_t)(matches - orig_matches);
477
+ }
860
478
 
861
- static const int kBucketBits = 17;
862
- static const size_t kBucketSize = 1 << kBucketBits;
863
-
864
- // The window size minus 1
865
- size_t window_mask_;
866
-
867
- // Hash table that maps the 4-byte hashes of the sequence to the last
868
- // position where this hash was found, which is the root of the binary
869
- // tree of sequences that share this hash bucket.
870
- uint32_t buckets_[kBucketSize];
871
-
872
- // The union of the binary trees of each hash bucket. The root of the tree
873
- // corresponding to a hash is a sequence starting at buckets_[hash] and
874
- // the left and right children of a sequence starting at pos are
875
- // forest_[2 * pos] and forest_[2 * pos + 1].
876
- uint32_t* forest_;
877
-
878
- // A position used to mark a non-existent sequence, i.e. a tree is empty if
879
- // its root is at invalid_pos_ and a node is a leaf if both its children
880
- // are at invalid_pos_.
881
- uint32_t invalid_pos_;
882
-
883
- bool need_init_;
884
- };
479
+ /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
480
+ current sequence, without returning any matches.
481
+ REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
482
+ static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,
483
+ const size_t mask, const size_t ix) {
484
+ /* Maximum distance is window size - 16, see section 9.1. of the spec. */
485
+ const size_t max_backward = self->window_mask_ - 15;
486
+ FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
487
+ max_backward, NULL, NULL);
488
+ }
885
489
 
886
- struct Hashers {
887
- // For kBucketSweep == 1, enabling the dictionary lookup makes compression
888
- // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
889
- // and html inputs.
890
- typedef HashLongestMatchQuickly<16, 1, true> H2;
891
- typedef HashLongestMatchQuickly<16, 2, false> H3;
892
- typedef HashLongestMatchQuickly<17, 4, true> H4;
893
- typedef HashLongestMatch<14, 4, 4> H5;
894
- typedef HashLongestMatch<14, 5, 4> H6;
895
- typedef HashLongestMatch<15, 6, 10> H7;
896
- typedef HashLongestMatch<15, 7, 10> H8;
897
- typedef HashLongestMatch<15, 8, 16> H9;
898
- typedef HashToBinaryTree H10;
899
-
900
- Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
901
- hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
902
-
903
- ~Hashers(void) {
904
- delete hash_h2;
905
- delete hash_h3;
906
- delete hash_h4;
907
- delete hash_h5;
908
- delete hash_h6;
909
- delete hash_h7;
910
- delete hash_h8;
911
- delete hash_h9;
912
- delete hash_h10;
490
+ static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
491
+ const uint8_t *data, const size_t mask, const size_t ix_start,
492
+ const size_t ix_end) {
493
+ size_t i = ix_start + 63 <= ix_end ? ix_end - 63 : ix_start;
494
+ for (; i < ix_end; ++i) {
495
+ FN(Store)(self, data, mask, i);
913
496
  }
497
+ }
914
498
 
915
- void Init(int type) {
916
- switch (type) {
917
- case 2: hash_h2 = new H2; break;
918
- case 3: hash_h3 = new H3; break;
919
- case 4: hash_h4 = new H4; break;
920
- case 5: hash_h5 = new H5; break;
921
- case 6: hash_h6 = new H6; break;
922
- case 7: hash_h7 = new H7; break;
923
- case 8: hash_h8 = new H8; break;
924
- case 9: hash_h9 = new H9; break;
925
- case 10: hash_h10 = new H10; break;
926
- default: break;
499
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,
500
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
501
+ size_t ringbuffer_mask) {
502
+ if (num_bytes >= FN(HashTypeLength)() - 1 &&
503
+ position >= MAX_TREE_COMP_LENGTH) {
504
+ /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
505
+ These could not be calculated before, since they require knowledge
506
+ of both the previous and the current block. */
507
+ const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
508
+ const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
509
+ size_t i;
510
+ for (i = i_start; i < i_end; ++i) {
511
+ /* Maximum distance is window size - 16, see section 9.1. of the spec.
512
+ Furthermore, we have to make sure that we don't look further back
513
+ from the start of the next block than the window size, otherwise we
514
+ could access already overwritten areas of the ringbuffer. */
515
+ const size_t max_backward =
516
+ self->window_mask_ - BROTLI_MAX(size_t, 15, position - i);
517
+ /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
518
+ end of the current block and that we have at least
519
+ MAX_TREE_COMP_LENGTH tail in the ringbuffer. */
520
+ FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
521
+ MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
927
522
  }
928
523
  }
524
+ }
929
525
 
930
- template<typename Hasher>
931
- void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
932
- hasher->Init();
933
- for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
934
- hasher->Store(&dict[i], static_cast<uint32_t>(i));
935
- }
936
- }
526
+ #undef BUCKET_SIZE
527
+ #undef BUCKET_BITS
528
+
529
+ #undef HASHER
530
+
531
+ /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
532
+ a little faster (0.5% - 1%) and it compresses 0.15% better on small text
533
+ and html inputs. */
534
+
535
+ #define HASHER() H2
536
+ #define BUCKET_BITS 16
537
+ #define BUCKET_SWEEP 1
538
+ #define USE_DICTIONARY 1
539
+ #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
540
+ #undef BUCKET_SWEEP
541
+ #undef USE_DICTIONARY
542
+ #undef HASHER
543
+
544
+ #define HASHER() H3
545
+ #define BUCKET_SWEEP 2
546
+ #define USE_DICTIONARY 0
547
+ #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
548
+ #undef USE_DICTIONARY
549
+ #undef BUCKET_SWEEP
550
+ #undef BUCKET_BITS
551
+ #undef HASHER
552
+
553
+ #define HASHER() H4
554
+ #define BUCKET_BITS 17
555
+ #define BUCKET_SWEEP 4
556
+ #define USE_DICTIONARY 1
557
+ #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
558
+ #undef USE_DICTIONARY
559
+ #undef BUCKET_SWEEP
560
+ #undef BUCKET_BITS
561
+ #undef HASHER
562
+
563
+ #define HASHER() H5
564
+ #define BUCKET_BITS 14
565
+ #define BLOCK_BITS 4
566
+ #define NUM_LAST_DISTANCES_TO_CHECK 4
567
+ #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
568
+ #undef BLOCK_BITS
569
+ #undef HASHER
570
+
571
+ #define HASHER() H6
572
+ #define BLOCK_BITS 5
573
+ #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
574
+ #undef NUM_LAST_DISTANCES_TO_CHECK
575
+ #undef BLOCK_BITS
576
+ #undef BUCKET_BITS
577
+ #undef HASHER
578
+
579
+ #define HASHER() H7
580
+ #define BUCKET_BITS 15
581
+ #define BLOCK_BITS 6
582
+ #define NUM_LAST_DISTANCES_TO_CHECK 10
583
+ #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
584
+ #undef BLOCK_BITS
585
+ #undef HASHER
586
+
587
+ #define HASHER() H8
588
+ #define BLOCK_BITS 7
589
+ #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
590
+ #undef NUM_LAST_DISTANCES_TO_CHECK
591
+ #undef BLOCK_BITS
592
+ #undef HASHER
593
+
594
+ #define HASHER() H9
595
+ #define BLOCK_BITS 8
596
+ #define NUM_LAST_DISTANCES_TO_CHECK 16
597
+ #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
598
+ #undef NUM_LAST_DISTANCES_TO_CHECK
599
+ #undef BLOCK_BITS
600
+ #undef BUCKET_BITS
601
+ #undef HASHER
602
+
603
+ #define BUCKET_BITS 15
604
+
605
+ #define NUM_LAST_DISTANCES_TO_CHECK 4
606
+ #define NUM_BANKS 1
607
+ #define BANK_BITS 16
608
+ #define HASHER() H40
609
+ #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
610
+ #undef HASHER
611
+ #undef NUM_LAST_DISTANCES_TO_CHECK
612
+
613
+ #define NUM_LAST_DISTANCES_TO_CHECK 10
614
+ #define HASHER() H41
615
+ #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
616
+ #undef HASHER
617
+ #undef NUM_LAST_DISTANCES_TO_CHECK
618
+ #undef NUM_BANKS
619
+ #undef BANK_BITS
620
+
621
+ #define NUM_LAST_DISTANCES_TO_CHECK 16
622
+ #define NUM_BANKS 512
623
+ #define BANK_BITS 9
624
+ #define HASHER() H42
625
+ #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
626
+ #undef HASHER
627
+ #undef NUM_LAST_DISTANCES_TO_CHECK
628
+ #undef NUM_BANKS
629
+ #undef BANK_BITS
630
+
631
+ #undef BUCKET_BITS
632
+
633
+ #undef FN
634
+ #undef CAT
635
+ #undef EXPAND_CAT
636
+
637
+ #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \
638
+ H(40) H(41) H(42)
639
+ #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
640
+
641
+ typedef struct Hashers {
642
+ #define _MEMBER(N) H ## N* h ## N;
643
+ FOR_ALL_HASHERS(_MEMBER)
644
+ #undef _MEMBER
645
+ } Hashers;
646
+
647
+ static BROTLI_INLINE void InitHashers(Hashers* self) {
648
+ #define _INIT(N) self->h ## N = 0;
649
+ FOR_ALL_HASHERS(_INIT)
650
+ #undef _INIT
651
+ }
937
652
 
938
- // Custom LZ77 window.
939
- void PrependCustomDictionary(
940
- int type, int lgwin, const size_t size, const uint8_t* dict) {
941
- switch (type) {
942
- case 2: WarmupHash(size, dict, hash_h2); break;
943
- case 3: WarmupHash(size, dict, hash_h3); break;
944
- case 4: WarmupHash(size, dict, hash_h4); break;
945
- case 5: WarmupHash(size, dict, hash_h5); break;
946
- case 6: WarmupHash(size, dict, hash_h6); break;
947
- case 7: WarmupHash(size, dict, hash_h7); break;
948
- case 8: WarmupHash(size, dict, hash_h8); break;
949
- case 9: WarmupHash(size, dict, hash_h9); break;
950
- case 10:
951
- hash_h10->Init(lgwin, 0, size, false);
952
- for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
953
- hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
954
- }
955
- break;
956
- default: break;
957
- }
653
+ static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {
654
+ if (self->h10) CleanupH10(m, self->h10);
655
+ #define _CLEANUP(N) BROTLI_FREE(m, self->h ## N)
656
+ FOR_ALL_HASHERS(_CLEANUP)
657
+ #undef _CLEANUP
658
+ }
659
+
660
+ static BROTLI_INLINE void HashersReset(Hashers* self, int type) {
661
+ switch (type) {
662
+ #define _RESET(N) case N: ResetH ## N(self->h ## N); break;
663
+ FOR_ALL_HASHERS(_RESET)
664
+ #undef _RESET
665
+ default: break;
958
666
  }
667
+ }
959
668
 
669
+ static BROTLI_INLINE void HashersSetup(
670
+ MemoryManager* m, Hashers* self, int type) {
671
+ switch (type) {
672
+ #define _SETUP(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;
673
+ FOR_ALL_HASHERS(_SETUP)
674
+ #undef _SETUP
675
+ default: break;
676
+ }
677
+ if (BROTLI_IS_OOM(m)) return;
678
+ if (type == 10) InitializeH10(self->h10);
679
+ HashersReset(self, type);
680
+ }
681
+
682
+ #define _WARMUP_HASH(N) \
683
+ static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \
684
+ const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \
685
+ H ## N* hasher) { \
686
+ size_t overlap = (StoreLookaheadH ## N()) - 1; \
687
+ size_t i; \
688
+ InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \
689
+ if (BROTLI_IS_OOM(m)) return; \
690
+ for (i = 0; i + overlap < size; i++) { \
691
+ StoreH ## N(hasher, dict, ~(size_t)0, i); \
692
+ } \
693
+ }
694
+ FOR_ALL_HASHERS(_WARMUP_HASH)
695
+ #undef _WARMUP_HASH
696
+
697
+ /* Custom LZ77 window. */
698
+ static BROTLI_INLINE void HashersPrependCustomDictionary(
699
+ MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,
700
+ const size_t size, const uint8_t* dict) {
701
+ int hasher_type = ChooseHasher(params);
702
+ switch (hasher_type) {
703
+ #define _PREPEND(N) \
704
+ case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;
705
+ FOR_ALL_HASHERS(_PREPEND)
706
+ #undef _PREPEND
707
+ default: break;
708
+ }
709
+ if (BROTLI_IS_OOM(m)) return;
710
+ }
960
711
 
961
- H2* hash_h2;
962
- H3* hash_h3;
963
- H4* hash_h4;
964
- H5* hash_h5;
965
- H6* hash_h6;
966
- H7* hash_h7;
967
- H8* hash_h8;
968
- H9* hash_h9;
969
- H10* hash_h10;
970
- };
971
712
 
972
- } // namespace brotli
713
+ #if defined(__cplusplus) || defined(c_plusplus)
714
+ } /* extern "C" */
715
+ #endif
973
716
 
974
- #endif // BROTLI_ENC_HASH_H_
717
+ #endif /* BROTLI_ENC_HASH_H_ */