brotli 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/ext/brotli/brotli.cc +114 -24
  3. data/ext/brotli/brotli.h +0 -1
  4. data/ext/brotli/extconf.rb +30 -23
  5. data/lib/brotli/version.rb +1 -1
  6. data/vendor/brotli/LICENSE +1 -1
  7. data/vendor/brotli/dec/Makefile +1 -1
  8. data/vendor/brotli/dec/bit_reader.c +3 -3
  9. data/vendor/brotli/dec/bit_reader.h +25 -27
  10. data/vendor/brotli/dec/context.h +4 -4
  11. data/vendor/brotli/dec/decode.c +410 -486
  12. data/vendor/brotli/dec/decode.h +101 -105
  13. data/vendor/brotli/dec/dictionary.c +1 -1
  14. data/vendor/brotli/dec/dictionary.h +7 -8
  15. data/vendor/brotli/dec/huffman.c +103 -105
  16. data/vendor/brotli/dec/huffman.h +18 -18
  17. data/vendor/brotli/dec/port.h +52 -40
  18. data/vendor/brotli/dec/prefix.h +2 -0
  19. data/vendor/brotli/dec/state.c +13 -19
  20. data/vendor/brotli/dec/state.h +25 -39
  21. data/vendor/brotli/dec/transform.h +38 -44
  22. data/vendor/brotli/dec/types.h +2 -2
  23. data/vendor/brotli/enc/Makefile +1 -1
  24. data/vendor/brotli/enc/backward_references.cc +455 -359
  25. data/vendor/brotli/enc/backward_references.h +79 -3
  26. data/vendor/brotli/enc/bit_cost.h +54 -32
  27. data/vendor/brotli/enc/block_splitter.cc +285 -193
  28. data/vendor/brotli/enc/block_splitter.h +4 -12
  29. data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
  30. data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
  31. data/vendor/brotli/enc/cluster.h +161 -120
  32. data/vendor/brotli/enc/command.h +60 -37
  33. data/vendor/brotli/enc/compress_fragment.cc +701 -0
  34. data/vendor/brotli/enc/compress_fragment.h +47 -0
  35. data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
  36. data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
  37. data/vendor/brotli/enc/compressor.h +15 -0
  38. data/vendor/brotli/enc/context.h +1 -1
  39. data/vendor/brotli/enc/dictionary.h +2 -2
  40. data/vendor/brotli/enc/encode.cc +819 -286
  41. data/vendor/brotli/enc/encode.h +38 -15
  42. data/vendor/brotli/enc/encode_parallel.cc +40 -42
  43. data/vendor/brotli/enc/entropy_encode.cc +144 -147
  44. data/vendor/brotli/enc/entropy_encode.h +32 -8
  45. data/vendor/brotli/enc/entropy_encode_static.h +572 -0
  46. data/vendor/brotli/enc/fast_log.h +7 -40
  47. data/vendor/brotli/enc/find_match_length.h +9 -9
  48. data/vendor/brotli/enc/hash.h +462 -154
  49. data/vendor/brotli/enc/histogram.cc +6 -6
  50. data/vendor/brotli/enc/histogram.h +13 -13
  51. data/vendor/brotli/enc/literal_cost.cc +45 -45
  52. data/vendor/brotli/enc/metablock.cc +92 -89
  53. data/vendor/brotli/enc/metablock.h +12 -12
  54. data/vendor/brotli/enc/port.h +7 -16
  55. data/vendor/brotli/enc/prefix.h +23 -22
  56. data/vendor/brotli/enc/ringbuffer.h +75 -29
  57. data/vendor/brotli/enc/static_dict.cc +56 -48
  58. data/vendor/brotli/enc/static_dict.h +5 -5
  59. data/vendor/brotli/enc/streams.cc +1 -1
  60. data/vendor/brotli/enc/streams.h +5 -5
  61. data/vendor/brotli/enc/transform.h +40 -35
  62. data/vendor/brotli/enc/types.h +2 -0
  63. data/vendor/brotli/enc/utf8_util.cc +3 -2
  64. data/vendor/brotli/enc/write_bits.h +6 -6
  65. metadata +9 -5
  66. data/vendor/brotli/dec/streams.c +0 -102
  67. data/vendor/brotli/dec/streams.h +0 -95
@@ -10,12 +10,10 @@
10
10
  #ifndef BROTLI_ENC_HASH_H_
11
11
  #define BROTLI_ENC_HASH_H_
12
12
 
13
- #include <string.h>
14
13
  #include <sys/types.h>
15
14
  #include <algorithm>
16
- #include <cstdlib>
17
- #include <memory>
18
- #include <string>
15
+ #include <cstring>
16
+ #include <limits>
19
17
 
20
18
  #include "./dictionary_hash.h"
21
19
  #include "./fast_log.h"
@@ -28,15 +26,20 @@
28
26
 
29
27
  namespace brotli {
30
28
 
31
- static const int kDistanceCacheIndex[] = {
29
+ static const size_t kMaxTreeSearchDepth = 64;
30
+ static const size_t kMaxTreeCompLength = 128;
31
+
32
+ static const uint32_t kDistanceCacheIndex[] = {
32
33
  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
33
34
  };
34
35
  static const int kDistanceCacheOffset[] = {
35
36
  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
36
37
  };
37
38
 
38
- static const int kCutoffTransformsCount = 10;
39
- static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
39
+ static const uint32_t kCutoffTransformsCount = 10;
40
+ static const uint8_t kCutoffTransforms[] = {
41
+ 0, 12, 27, 23, 42, 63, 56, 48, 59, 64
42
+ };
40
43
 
41
44
  // kHashMul32 multiplier has these properties:
42
45
  // * The multiplier must be odd. Otherwise we may lose the highest bit.
@@ -68,41 +71,47 @@ inline uint32_t Hash(const uint8_t *data) {
68
71
  // This function is used to sometimes discard a longer backward reference
69
72
  // when it is not much longer and the bit cost for encoding it is more
70
73
  // than the saved literals.
71
- inline double BackwardReferenceScore(int copy_length,
72
- int backward_reference_offset) {
73
- return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
74
+ //
75
+ // backward_reference_offset MUST be positive.
76
+ inline double BackwardReferenceScore(size_t copy_length,
77
+ size_t backward_reference_offset) {
78
+ return 5.4 * static_cast<double>(copy_length) -
79
+ 1.20 * Log2FloorNonZero(backward_reference_offset);
74
80
  }
75
81
 
76
- inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
77
- int distance_short_code) {
82
+ inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
83
+ size_t distance_short_code) {
78
84
  static const double kDistanceShortCodeBitCost[16] = {
79
85
  -0.6, 0.95, 1.17, 1.27,
80
86
  0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
81
87
  1.05, 1.05, 1.15, 1.15, 1.25, 1.25
82
88
  };
83
- return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
89
+ return 5.4 * static_cast<double>(copy_length) -
90
+ kDistanceShortCodeBitCost[distance_short_code];
84
91
  }
85
92
 
86
93
  struct BackwardMatch {
87
- BackwardMatch() : distance(0), length_and_code(0) {}
94
+ BackwardMatch(void) : distance(0), length_and_code(0) {}
88
95
 
89
- BackwardMatch(int dist, int len)
90
- : distance(dist), length_and_code((len << 5)) {}
96
+ BackwardMatch(size_t dist, size_t len)
97
+ : distance(static_cast<uint32_t>(dist))
98
+ , length_and_code(static_cast<uint32_t>(len << 5)) {}
91
99
 
92
- BackwardMatch(int dist, int len, int len_code)
93
- : distance(dist),
94
- length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
100
+ BackwardMatch(size_t dist, size_t len, size_t len_code)
101
+ : distance(static_cast<uint32_t>(dist))
102
+ , length_and_code(static_cast<uint32_t>(
103
+ (len << 5) | (len == len_code ? 0 : len_code))) {}
95
104
 
96
- int length() const {
105
+ size_t length(void) const {
97
106
  return length_and_code >> 5;
98
107
  }
99
- int length_code() const {
100
- int code = length_and_code & 31;
108
+ size_t length_code(void) const {
109
+ size_t code = length_and_code & 31;
101
110
  return code ? code : length();
102
111
  }
103
112
 
104
- int distance;
105
- int length_and_code;
113
+ uint32_t distance;
114
+ uint32_t length_and_code;
106
115
  };
107
116
 
108
117
  // A (forgetful) hash table to the data seen by the compressor, to
@@ -113,18 +122,31 @@ struct BackwardMatch {
113
122
  template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
114
123
  class HashLongestMatchQuickly {
115
124
  public:
116
- HashLongestMatchQuickly() {
125
+ HashLongestMatchQuickly(void) {
117
126
  Reset();
118
127
  }
119
- void Reset() {
120
- // It is not strictly necessary to fill this buffer here, but
121
- // not filling will make the results of the compression stochastic
122
- // (but correct). This is because random data would cause the
123
- // system to find accidentally good backward references here and there.
124
- memset(&buckets_[0], 0, sizeof(buckets_));
128
+ void Reset(void) {
129
+ need_init_ = true;
125
130
  num_dict_lookups_ = 0;
126
131
  num_dict_matches_ = 0;
127
132
  }
133
+ void Init(void) {
134
+ if (need_init_) {
135
+ // It is not strictly necessary to fill this buffer here, but
136
+ // not filling will make the results of the compression stochastic
137
+ // (but correct). This is because random data would cause the
138
+ // system to find accidentally good backward references here and there.
139
+ memset(&buckets_[0], 0, sizeof(buckets_));
140
+ need_init_ = false;
141
+ }
142
+ }
143
+ void InitForData(const uint8_t* data, size_t num) {
144
+ for (size_t i = 0; i < num; ++i) {
145
+ const uint32_t key = HashBytes(&data[i]);
146
+ memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
147
+ need_init_ = false;
148
+ }
149
+ }
128
150
  // Look at 4 bytes at data.
129
151
  // Compute a hash from these, and store the value somewhere within
130
152
  // [ix .. ix+3].
@@ -136,7 +158,8 @@ class HashLongestMatchQuickly {
136
158
  }
137
159
 
138
160
  // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
139
- // up to the length of max_length.
161
+ // up to the length of max_length and stores the position cur_ix in the
162
+ // hash table.
140
163
  //
141
164
  // Does not look for matches longer than max_length.
142
165
  // Does not look for matches further away than max_backward.
@@ -146,27 +169,28 @@ class HashLongestMatchQuickly {
146
169
  inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
147
170
  const size_t ring_buffer_mask,
148
171
  const int* __restrict distance_cache,
149
- const uint32_t cur_ix,
150
- const int max_length,
151
- const uint32_t max_backward,
152
- int * __restrict best_len_out,
153
- int * __restrict best_len_code_out,
154
- int * __restrict best_distance_out,
172
+ const size_t cur_ix,
173
+ const size_t max_length,
174
+ const size_t max_backward,
175
+ size_t * __restrict best_len_out,
176
+ size_t * __restrict best_len_code_out,
177
+ size_t * __restrict best_distance_out,
155
178
  double* __restrict best_score_out) {
156
- const int best_len_in = *best_len_out;
179
+ const size_t best_len_in = *best_len_out;
157
180
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
181
+ const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
158
182
  int compare_char = ring_buffer[cur_ix_masked + best_len_in];
159
183
  double best_score = *best_score_out;
160
- int best_len = best_len_in;
161
- int cached_backward = distance_cache[0];
162
- uint32_t prev_ix = cur_ix - cached_backward;
184
+ size_t best_len = best_len_in;
185
+ size_t cached_backward = static_cast<size_t>(distance_cache[0]);
186
+ size_t prev_ix = cur_ix - cached_backward;
163
187
  bool match_found = false;
164
188
  if (prev_ix < cur_ix) {
165
189
  prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
166
190
  if (compare_char == ring_buffer[prev_ix + best_len]) {
167
- int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
168
- &ring_buffer[cur_ix_masked],
169
- max_length);
191
+ size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
192
+ &ring_buffer[cur_ix_masked],
193
+ max_length);
170
194
  if (len >= 4) {
171
195
  best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
172
196
  best_len = len;
@@ -176,6 +200,7 @@ class HashLongestMatchQuickly {
176
200
  *best_score_out = best_score;
177
201
  compare_char = ring_buffer[cur_ix_masked + best_len];
178
202
  if (kBucketSweep == 1) {
203
+ buckets_[key] = static_cast<uint32_t>(cur_ix);
179
204
  return true;
180
205
  } else {
181
206
  match_found = true;
@@ -183,11 +208,11 @@ class HashLongestMatchQuickly {
183
208
  }
184
209
  }
185
210
  }
186
- const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
187
211
  if (kBucketSweep == 1) {
188
212
  // Only one to look for, don't bother to prepare for a loop.
189
213
  prev_ix = buckets_[key];
190
- uint32_t backward = cur_ix - prev_ix;
214
+ buckets_[key] = static_cast<uint32_t>(cur_ix);
215
+ size_t backward = cur_ix - prev_ix;
191
216
  prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
192
217
  if (compare_char != ring_buffer[prev_ix + best_len_in]) {
193
218
  return false;
@@ -195,9 +220,9 @@ class HashLongestMatchQuickly {
195
220
  if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
196
221
  return false;
197
222
  }
198
- const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
199
- &ring_buffer[cur_ix_masked],
200
- max_length);
223
+ const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
224
+ &ring_buffer[cur_ix_masked],
225
+ max_length);
201
226
  if (len >= 4) {
202
227
  *best_len_out = len;
203
228
  *best_len_code_out = len;
@@ -209,7 +234,7 @@ class HashLongestMatchQuickly {
209
234
  uint32_t *bucket = buckets_ + key;
210
235
  prev_ix = *bucket++;
211
236
  for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
212
- const uint32_t backward = cur_ix - prev_ix;
237
+ const size_t backward = cur_ix - prev_ix;
213
238
  prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
214
239
  if (compare_char != ring_buffer[prev_ix + best_len]) {
215
240
  continue;
@@ -217,10 +242,9 @@ class HashLongestMatchQuickly {
217
242
  if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
218
243
  continue;
219
244
  }
220
- const int len =
221
- FindMatchLengthWithLimit(&ring_buffer[prev_ix],
222
- &ring_buffer[cur_ix_masked],
223
- max_length);
245
+ const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
246
+ &ring_buffer[cur_ix_masked],
247
+ max_length);
224
248
  if (len >= 4) {
225
249
  const double score = BackwardReferenceScore(len, backward);
226
250
  if (best_score < score) {
@@ -242,19 +266,20 @@ class HashLongestMatchQuickly {
242
266
  const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
243
267
  const uint16_t v = kStaticDictionaryHash[dict_key];
244
268
  if (v > 0) {
245
- const int len = v & 31;
246
- const int dist = v >> 5;
247
- const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
269
+ const uint32_t len = v & 31;
270
+ const uint32_t dist = v >> 5;
271
+ const size_t offset =
272
+ kBrotliDictionaryOffsetsByLength[len] + len * dist;
248
273
  if (len <= max_length) {
249
- const int matchlen =
274
+ const size_t matchlen =
250
275
  FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
251
276
  &kBrotliDictionary[offset], len);
252
- if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
253
- const int transform_id = kCutoffTransforms[len - matchlen];
254
- const int word_id =
255
- transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
277
+ if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
278
+ const size_t transform_id = kCutoffTransforms[len - matchlen];
279
+ const size_t word_id =
280
+ transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
256
281
  dist;
257
- const int backward = max_backward + word_id + 1;
282
+ const size_t backward = max_backward + word_id + 1;
258
283
  const double score = BackwardReferenceScore(matchlen, backward);
259
284
  if (best_score < score) {
260
285
  ++num_dict_matches_;
@@ -264,12 +289,14 @@ class HashLongestMatchQuickly {
264
289
  *best_len_code_out = len;
265
290
  *best_distance_out = backward;
266
291
  *best_score_out = best_score;
267
- return true;
292
+ match_found = true;
268
293
  }
269
294
  }
270
295
  }
271
296
  }
272
297
  }
298
+ const uint32_t off = (cur_ix >> 3) % kBucketSweep;
299
+ buckets_[key + off] = static_cast<uint32_t>(cur_ix);
273
300
  return match_found;
274
301
  }
275
302
 
@@ -287,16 +314,17 @@ class HashLongestMatchQuickly {
287
314
  return static_cast<uint32_t>(h >> (64 - kBucketBits));
288
315
  }
289
316
 
317
+ enum { kHashMapSize = 4 << kBucketBits };
318
+
290
319
  private:
291
320
  static const uint32_t kBucketSize = 1 << kBucketBits;
292
321
  uint32_t buckets_[kBucketSize + kBucketSweep];
322
+ // True if buckets_ array needs to be initialized.
323
+ bool need_init_;
293
324
  size_t num_dict_lookups_;
294
325
  size_t num_dict_matches_;
295
326
  };
296
327
 
297
- // The maximum length for which the zopflification uses distinct distances.
298
- static const int kMaxZopfliLen = 325;
299
-
300
328
  // A (forgetful) hash table to the data seen by the compressor, to
301
329
  // help create backward references to previous data.
302
330
  //
@@ -308,16 +336,31 @@ template <int kBucketBits,
308
336
  int kNumLastDistancesToCheck>
309
337
  class HashLongestMatch {
310
338
  public:
311
- HashLongestMatch() {
339
+ HashLongestMatch(void) {
312
340
  Reset();
313
341
  }
314
342
 
315
- void Reset() {
316
- memset(&num_[0], 0, sizeof(num_));
343
+ void Reset(void) {
344
+ need_init_ = true;
317
345
  num_dict_lookups_ = 0;
318
346
  num_dict_matches_ = 0;
319
347
  }
320
348
 
349
+ void Init(void) {
350
+ if (need_init_) {
351
+ memset(&num_[0], 0, sizeof(num_));
352
+ need_init_ = false;
353
+ }
354
+ }
355
+
356
+ void InitForData(const uint8_t* data, size_t num) {
357
+ for (size_t i = 0; i < num; ++i) {
358
+ const uint32_t key = HashBytes(&data[i]);
359
+ num_[key] = 0;
360
+ need_init_ = false;
361
+ }
362
+ }
363
+
321
364
  // Look at 3 bytes at data.
322
365
  // Compute a hash from these, and store the value of ix at that position.
323
366
  inline void Store(const uint8_t *data, const uint32_t ix) {
@@ -328,7 +371,7 @@ class HashLongestMatch {
328
371
  }
329
372
 
330
373
  // Find a longest backward match of &data[cur_ix] up to the length of
331
- // max_length.
374
+ // max_length and stores the position cur_ix in the hash table.
332
375
  //
333
376
  // Does not look for matches longer than max_length.
334
377
  // Does not look for matches further away than max_backward.
@@ -339,41 +382,42 @@ class HashLongestMatch {
339
382
  bool FindLongestMatch(const uint8_t * __restrict data,
340
383
  const size_t ring_buffer_mask,
341
384
  const int* __restrict distance_cache,
342
- const uint32_t cur_ix,
343
- const int max_length,
344
- const uint32_t max_backward,
345
- int * __restrict best_len_out,
346
- int * __restrict best_len_code_out,
347
- int * __restrict best_distance_out,
385
+ const size_t cur_ix,
386
+ const size_t max_length,
387
+ const size_t max_backward,
388
+ size_t * __restrict best_len_out,
389
+ size_t * __restrict best_len_code_out,
390
+ size_t * __restrict best_distance_out,
348
391
  double * __restrict best_score_out) {
349
392
  *best_len_code_out = 0;
350
393
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
351
394
  bool match_found = false;
352
395
  // Don't accept a short copy from far away.
353
396
  double best_score = *best_score_out;
354
- int best_len = *best_len_out;
397
+ size_t best_len = *best_len_out;
355
398
  *best_len_out = 0;
356
399
  // Try last distance first.
357
- for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
358
- const int idx = kDistanceCacheIndex[i];
359
- const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
360
- uint32_t prev_ix = cur_ix - backward;
400
+ for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
401
+ const size_t idx = kDistanceCacheIndex[i];
402
+ const size_t backward =
403
+ static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
404
+ size_t prev_ix = static_cast<size_t>(cur_ix - backward);
361
405
  if (prev_ix >= cur_ix) {
362
406
  continue;
363
407
  }
364
- if (PREDICT_FALSE(backward > (int)max_backward)) {
408
+ if (PREDICT_FALSE(backward > max_backward)) {
365
409
  continue;
366
410
  }
367
- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
411
+ prev_ix &= ring_buffer_mask;
368
412
 
369
413
  if (cur_ix_masked + best_len > ring_buffer_mask ||
370
414
  prev_ix + best_len > ring_buffer_mask ||
371
415
  data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
372
416
  continue;
373
417
  }
374
- const int len =
375
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
376
- max_length);
418
+ const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
419
+ &data[cur_ix_masked],
420
+ max_length);
377
421
  if (len >= 3 || (len == 2 && i < 2)) {
378
422
  // Comparing for >= 2 does not change the semantics, but just saves for
379
423
  // a few unnecessary binary logarithms in backward reference score,
@@ -392,22 +436,23 @@ class HashLongestMatch {
392
436
  }
393
437
  const uint32_t key = HashBytes(&data[cur_ix_masked]);
394
438
  const uint32_t * __restrict const bucket = &buckets_[key][0];
395
- const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
396
- for (int i = num_[key] - 1; i >= down; --i) {
397
- uint32_t prev_ix = bucket[i & kBlockMask];
398
- const uint32_t backward = cur_ix - prev_ix;
439
+ const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
440
+ for (size_t i = num_[key]; i > down;) {
441
+ --i;
442
+ size_t prev_ix = bucket[i & kBlockMask];
443
+ const size_t backward = cur_ix - prev_ix;
399
444
  if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
400
445
  break;
401
446
  }
402
- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
447
+ prev_ix &= ring_buffer_mask;
403
448
  if (cur_ix_masked + best_len > ring_buffer_mask ||
404
449
  prev_ix + best_len > ring_buffer_mask ||
405
450
  data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
406
451
  continue;
407
452
  }
408
- const int len =
409
- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
410
- max_length);
453
+ const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
454
+ &data[cur_ix_masked],
455
+ max_length);
411
456
  if (len >= 4) {
412
457
  // Comparing for >= 3 does not change the semantics, but just saves
413
458
  // for a few unnecessary binary logarithms in backward reference
@@ -424,25 +469,28 @@ class HashLongestMatch {
424
469
  }
425
470
  }
426
471
  }
472
+ buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
473
+ ++num_[key];
427
474
  if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
428
- uint32_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
475
+ size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
429
476
  for (int k = 0; k < 2; ++k, ++dict_key) {
430
477
  ++num_dict_lookups_;
431
478
  const uint16_t v = kStaticDictionaryHash[dict_key];
432
479
  if (v > 0) {
433
- const int len = v & 31;
434
- const int dist = v >> 5;
435
- const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
480
+ const size_t len = v & 31;
481
+ const size_t dist = v >> 5;
482
+ const size_t offset =
483
+ kBrotliDictionaryOffsetsByLength[len] + len * dist;
436
484
  if (len <= max_length) {
437
- const int matchlen =
485
+ const size_t matchlen =
438
486
  FindMatchLengthWithLimit(&data[cur_ix_masked],
439
487
  &kBrotliDictionary[offset], len);
440
- if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
441
- const int transform_id = kCutoffTransforms[len - matchlen];
442
- const int word_id =
488
+ if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
489
+ const size_t transform_id = kCutoffTransforms[len - matchlen];
490
+ const size_t word_id =
443
491
  transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
444
492
  dist;
445
- const int backward = max_backward + word_id + 1;
493
+ const size_t backward = max_backward + word_id + 1;
446
494
  double score = BackwardReferenceScore(matchlen, backward);
447
495
  if (best_score < score) {
448
496
  ++num_dict_matches_;
@@ -462,28 +510,25 @@ class HashLongestMatch {
462
510
  return match_found;
463
511
  }
464
512
 
465
- // Similar to FindLongestMatch(), but finds all matches.
513
+ // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
514
+ // length of max_length and stores the position cur_ix in the hash table.
466
515
  //
467
516
  // Sets *num_matches to the number of matches found, and stores the found
468
- // matches in matches[0] to matches[*num_matches - 1].
469
- //
470
- // If the longest match is longer than kMaxZopfliLen, returns only this
471
- // longest match.
472
- //
473
- // Requires that at least kMaxZopfliLen space is available in matches.
474
- void FindAllMatches(const uint8_t* data,
475
- const size_t ring_buffer_mask,
476
- const uint32_t cur_ix,
477
- const int max_length,
478
- const uint32_t max_backward,
479
- int* num_matches,
480
- BackwardMatch* matches) const {
517
+ // matches in matches[0] to matches[*num_matches - 1]. The matches will be
518
+ // sorted by strictly increasing length and (non-strictly) increasing
519
+ // distance.
520
+ size_t FindAllMatches(const uint8_t* data,
521
+ const size_t ring_buffer_mask,
522
+ const size_t cur_ix,
523
+ const size_t max_length,
524
+ const size_t max_backward,
525
+ BackwardMatch* matches) {
481
526
  BackwardMatch* const orig_matches = matches;
482
527
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
483
- int best_len = 1;
484
- int stop = static_cast<int>(cur_ix) - 64;
485
- if (stop < 0) { stop = 0; }
486
- for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
528
+ size_t best_len = 1;
529
+ size_t stop = cur_ix - 64;
530
+ if (cur_ix < 64) { stop = 0; }
531
+ for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
487
532
  size_t prev_ix = i;
488
533
  const size_t backward = cur_ix - prev_ix;
489
534
  if (PREDICT_FALSE(backward > max_backward)) {
@@ -494,57 +539,57 @@ class HashLongestMatch {
494
539
  data[cur_ix_masked + 1] != data[prev_ix + 1]) {
495
540
  continue;
496
541
  }
497
- const int len =
542
+ const size_t len =
498
543
  FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
499
544
  max_length);
500
545
  if (len > best_len) {
501
546
  best_len = len;
502
- if (len > kMaxZopfliLen) {
503
- matches = orig_matches;
504
- }
505
- *matches++ = BackwardMatch(static_cast<int>(backward), len);
547
+ *matches++ = BackwardMatch(backward, len);
506
548
  }
507
549
  }
508
550
  const uint32_t key = HashBytes(&data[cur_ix_masked]);
509
551
  const uint32_t * __restrict const bucket = &buckets_[key][0];
510
- const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
511
- for (int i = num_[key] - 1; i >= down; --i) {
512
- uint32_t prev_ix = bucket[i & kBlockMask];
513
- const uint32_t backward = cur_ix - prev_ix;
552
+ const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
553
+ for (size_t i = num_[key]; i > down;) {
554
+ --i;
555
+ size_t prev_ix = bucket[i & kBlockMask];
556
+ const size_t backward = cur_ix - prev_ix;
514
557
  if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
515
558
  break;
516
559
  }
517
- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
560
+ prev_ix &= ring_buffer_mask;
518
561
  if (cur_ix_masked + best_len > ring_buffer_mask ||
519
562
  prev_ix + best_len > ring_buffer_mask ||
520
563
  data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
521
564
  continue;
522
565
  }
523
- const int len =
566
+ const size_t len =
524
567
  FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
525
568
  max_length);
526
569
  if (len > best_len) {
527
570
  best_len = len;
528
- if (len > kMaxZopfliLen) {
529
- matches = orig_matches;
530
- }
531
571
  *matches++ = BackwardMatch(backward, len);
532
572
  }
533
573
  }
534
- std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
535
- int minlen = std::max<int>(4, best_len + 1);
574
+ buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
575
+ ++num_[key];
576
+ uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
577
+ for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
578
+ dict_matches[i] = kInvalidMatch;
579
+ }
580
+ size_t minlen = std::max<size_t>(4, best_len + 1);
536
581
  if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
537
582
  &dict_matches[0])) {
538
- int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
539
- for (int l = minlen; l <= maxlen; ++l) {
540
- int dict_id = dict_matches[l];
583
+ size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
584
+ for (size_t l = minlen; l <= maxlen; ++l) {
585
+ uint32_t dict_id = dict_matches[l];
541
586
  if (dict_id < kInvalidMatch) {
542
587
  *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
543
588
  dict_id & 31);
544
589
  }
545
590
  }
546
591
  }
547
- *num_matches += static_cast<int>(matches - orig_matches);
592
+ return static_cast<size_t>(matches - orig_matches);
548
593
  }
549
594
 
550
595
  enum { kHashLength = 4 };
@@ -560,6 +605,10 @@ class HashLongestMatch {
560
605
  return h >> (32 - kBucketBits);
561
606
  }
562
607
 
608
+ enum { kHashMapSize = 2 << kBucketBits };
609
+
610
+ static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
611
+
563
612
  private:
564
613
  // Number of hash buckets.
565
614
  static const uint32_t kBucketSize = 1 << kBucketBits;
@@ -577,29 +626,281 @@ class HashLongestMatch {
577
626
  // Buckets containing kBlockSize of backward references.
578
627
  uint32_t buckets_[kBucketSize][kBlockSize];
579
628
 
629
+ // True if num_ array needs to be initialized.
630
+ bool need_init_;
631
+
580
632
  size_t num_dict_lookups_;
581
633
  size_t num_dict_matches_;
582
634
  };
583
635
 
636
+ // A (forgetful) hash table where each hash bucket contains a binary tree of
637
+ // sequences whose first 4 bytes share the same hash code.
638
+ // Each sequence is kMaxTreeCompLength long and is identified by its starting
639
+ // position in the input data. The binary tree is sorted by the lexicographic
640
+ // order of the sequences, and it is also a max-heap with respect to the
641
+ // starting positions.
642
+ class HashToBinaryTree {
643
+ public:
644
+ HashToBinaryTree() : forest_(NULL) {
645
+ Reset();
646
+ }
647
+
648
+ ~HashToBinaryTree() {
649
+ delete[] forest_;
650
+ }
651
+
652
+ void Reset() {
653
+ need_init_ = true;
654
+ }
655
+
656
+ void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
657
+ if (need_init_) {
658
+ window_mask_ = (1u << lgwin) - 1u;
659
+ invalid_pos_ = static_cast<uint32_t>(-window_mask_);
660
+ for (uint32_t i = 0; i < kBucketSize; i++) {
661
+ buckets_[i] = invalid_pos_;
662
+ }
663
+ size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
664
+ forest_ = new uint32_t[2 * num_nodes];
665
+ need_init_ = false;
666
+ }
667
+ }
668
+
669
+ // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
670
+ // length of max_length and stores the position cur_ix in the hash table.
671
+ //
672
+ // Sets *num_matches to the number of matches found, and stores the found
673
+ // matches in matches[0] to matches[*num_matches - 1]. The matches will be
674
+ // sorted by strictly increasing length and (non-strictly) increasing
675
+ // distance.
676
+ size_t FindAllMatches(const uint8_t* data,
677
+ const size_t ring_buffer_mask,
678
+ const size_t cur_ix,
679
+ const size_t max_length,
680
+ const size_t max_backward,
681
+ BackwardMatch* matches) {
682
+ BackwardMatch* const orig_matches = matches;
683
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
684
+ size_t best_len = 1;
685
+ size_t stop = cur_ix - 64;
686
+ if (cur_ix < 64) { stop = 0; }
687
+ for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
688
+ size_t prev_ix = i;
689
+ const size_t backward = cur_ix - prev_ix;
690
+ if (PREDICT_FALSE(backward > max_backward)) {
691
+ break;
692
+ }
693
+ prev_ix &= ring_buffer_mask;
694
+ if (data[cur_ix_masked] != data[prev_ix] ||
695
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
696
+ continue;
697
+ }
698
+ const size_t len =
699
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
700
+ max_length);
701
+ if (len > best_len) {
702
+ best_len = len;
703
+ *matches++ = BackwardMatch(backward, len);
704
+ }
705
+ }
706
+ if (best_len < max_length) {
707
+ matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
708
+ max_length, &best_len, matches);
709
+ }
710
+ uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
711
+ for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
712
+ dict_matches[i] = kInvalidMatch;
713
+ }
714
+ size_t minlen = std::max<size_t>(4, best_len + 1);
715
+ if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
716
+ &dict_matches[0])) {
717
+ size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
718
+ for (size_t l = minlen; l <= maxlen; ++l) {
719
+ uint32_t dict_id = dict_matches[l];
720
+ if (dict_id < kInvalidMatch) {
721
+ *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
722
+ dict_id & 31);
723
+ }
724
+ }
725
+ }
726
+ return static_cast<size_t>(matches - orig_matches);
727
+ }
728
+
729
+ // Stores the hash of the next 4 bytes and re-roots the binary tree at the
730
+ // current sequence, without returning any matches.
731
+ // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
732
+ void Store(const uint8_t* data,
733
+ const size_t ring_buffer_mask,
734
+ const size_t cur_ix) {
735
+ size_t best_len = 0;
736
+ StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
737
+ &best_len, NULL);
738
+ }
739
+
740
+ void StitchToPreviousBlock(size_t num_bytes,
741
+ size_t position,
742
+ const uint8_t* ringbuffer,
743
+ size_t ringbuffer_mask) {
744
+ if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
745
+ // Store the last `kMaxTreeCompLength - 1` positions in the hasher.
746
+ // These could not be calculated before, since they require knowledge
747
+ // of both the previous and the current block.
748
+ const size_t i_start = position - kMaxTreeCompLength + 1;
749
+ const size_t i_end = std::min(position, i_start + num_bytes);
750
+ for (size_t i = i_start; i < i_end; ++i) {
751
+ // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
752
+ // end of the current block and that we have at least
753
+ // kMaxTreeCompLength tail in the ringbuffer.
754
+ Store(ringbuffer, ringbuffer_mask, i);
755
+ }
756
+ }
757
+ }
758
+
759
+ static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
760
+
761
+ private:
762
+ // Stores the hash of the next 4 bytes and in a single tree-traversal, the
763
+ // hash bucket's binary tree is searched for matches and is re-rooted at the
764
+ // current position.
765
+ //
766
+ // If less than kMaxTreeCompLength data is available, the hash bucket of the
767
+ // current position is searched for matches, but the state of the hash table
768
+ // is not changed, since we can not know the final sorting order of the
769
+ // current (incomplete) sequence.
770
+ //
771
+ // This function must be called with increasing cur_ix positions.
772
+ BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
773
+ const size_t cur_ix,
774
+ const size_t ring_buffer_mask,
775
+ const size_t max_length,
776
+ size_t* const __restrict best_len,
777
+ BackwardMatch* __restrict matches) {
778
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
779
+ const size_t max_backward = window_mask_ - 15;
780
+ const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
781
+ const bool reroot_tree = max_length >= kMaxTreeCompLength;
782
+ const uint32_t key = HashBytes(&data[cur_ix_masked]);
783
+ size_t prev_ix = buckets_[key];
784
+ // The forest index of the rightmost node of the left subtree of the new
785
+ // root, updated as we traverse and reroot the tree of the hash bucket.
786
+ size_t node_left = LeftChildIndex(cur_ix);
787
+ // The forest index of the leftmost node of the right subtree of the new
788
+ // root, updated as we traverse and reroot the tree of the hash bucket.
789
+ size_t node_right = RightChildIndex(cur_ix);
790
+ // The match length of the rightmost node of the left subtree of the new
791
+ // root, updated as we traverse and reroot the tree of the hash bucket.
792
+ size_t best_len_left = 0;
793
+ // The match length of the leftmost node of the right subtree of the new
794
+ // root, updated as we traverse and reroot the tree of the hash bucket.
795
+ size_t best_len_right = 0;
796
+ if (reroot_tree) {
797
+ buckets_[key] = static_cast<uint32_t>(cur_ix);
798
+ }
799
+ for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
800
+ const size_t backward = cur_ix - prev_ix;
801
+ const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
802
+ if (backward == 0 || backward > max_backward || depth_remaining == 0) {
803
+ if (reroot_tree) {
804
+ forest_[node_left] = invalid_pos_;
805
+ forest_[node_right] = invalid_pos_;
806
+ }
807
+ break;
808
+ }
809
+ const size_t cur_len = std::min(best_len_left, best_len_right);
810
+ const size_t len = cur_len +
811
+ FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
812
+ &data[prev_ix_masked + cur_len],
813
+ max_length - cur_len);
814
+ if (len > *best_len) {
815
+ *best_len = len;
816
+ if (matches) {
817
+ *matches++ = BackwardMatch(backward, len);
818
+ }
819
+ if (len >= max_comp_len) {
820
+ if (reroot_tree) {
821
+ forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
822
+ forest_[node_right] = forest_[RightChildIndex(prev_ix)];
823
+ }
824
+ break;
825
+ }
826
+ }
827
+ if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
828
+ best_len_left = len;
829
+ if (reroot_tree) {
830
+ forest_[node_left] = static_cast<uint32_t>(prev_ix);
831
+ }
832
+ node_left = RightChildIndex(prev_ix);
833
+ prev_ix = forest_[node_left];
834
+ } else {
835
+ best_len_right = len;
836
+ if (reroot_tree) {
837
+ forest_[node_right] = static_cast<uint32_t>(prev_ix);
838
+ }
839
+ node_right = LeftChildIndex(prev_ix);
840
+ prev_ix = forest_[node_right];
841
+ }
842
+ }
843
+ return matches;
844
+ }
845
+
846
+ inline size_t LeftChildIndex(const size_t pos) {
847
+ return 2 * (pos & window_mask_);
848
+ }
849
+
850
+ inline size_t RightChildIndex(const size_t pos) {
851
+ return 2 * (pos & window_mask_) + 1;
852
+ }
853
+
854
+ static uint32_t HashBytes(const uint8_t *data) {
855
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
856
+ // The higher bits contain more mixture from the multiplication,
857
+ // so we take our results from there.
858
+ return h >> (32 - kBucketBits);
859
+ }
860
+
861
+ static const int kBucketBits = 17;
862
+ static const size_t kBucketSize = 1 << kBucketBits;
863
+
864
+ // The window size minus 1
865
+ size_t window_mask_;
866
+
867
+ // Hash table that maps the 4-byte hashes of the sequence to the last
868
+ // position where this hash was found, which is the root of the binary
869
+ // tree of sequences that share this hash bucket.
870
+ uint32_t buckets_[kBucketSize];
871
+
872
+ // The union of the binary trees of each hash bucket. The root of the tree
873
+ // corresponding to a hash is a sequence starting at buckets_[hash] and
874
+ // the left and right children of a sequence starting at pos are
875
+ // forest_[2 * pos] and forest_[2 * pos + 1].
876
+ uint32_t* forest_;
877
+
878
+ // A position used to mark a non-existent sequence, i.e. a tree is empty if
879
+ // its root is at invalid_pos_ and a node is a leaf if both its children
880
+ // are at invalid_pos_.
881
+ uint32_t invalid_pos_;
882
+
883
+ bool need_init_;
884
+ };
885
+
584
886
  struct Hashers {
585
887
  // For kBucketSweep == 1, enabling the dictionary lookup makes compression
586
888
  // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
587
889
  // and html inputs.
588
- typedef HashLongestMatchQuickly<16, 1, true> H1;
589
- typedef HashLongestMatchQuickly<16, 2, false> H2;
590
- typedef HashLongestMatchQuickly<16, 4, false> H3;
890
+ typedef HashLongestMatchQuickly<16, 1, true> H2;
891
+ typedef HashLongestMatchQuickly<16, 2, false> H3;
591
892
  typedef HashLongestMatchQuickly<17, 4, true> H4;
592
893
  typedef HashLongestMatch<14, 4, 4> H5;
593
894
  typedef HashLongestMatch<14, 5, 4> H6;
594
895
  typedef HashLongestMatch<15, 6, 10> H7;
595
896
  typedef HashLongestMatch<15, 7, 10> H8;
596
897
  typedef HashLongestMatch<15, 8, 16> H9;
898
+ typedef HashToBinaryTree H10;
597
899
 
598
- Hashers() : hash_h1(0), hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
599
- hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0) {}
900
+ Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
901
+ hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
600
902
 
601
- ~Hashers() {
602
- delete hash_h1;
903
+ ~Hashers(void) {
603
904
  delete hash_h2;
604
905
  delete hash_h3;
605
906
  delete hash_h4;
@@ -608,11 +909,11 @@ struct Hashers {
608
909
  delete hash_h7;
609
910
  delete hash_h8;
610
911
  delete hash_h9;
912
+ delete hash_h10;
611
913
  }
612
914
 
613
915
  void Init(int type) {
614
916
  switch (type) {
615
- case 1: hash_h1 = new H1; break;
616
917
  case 2: hash_h2 = new H2; break;
617
918
  case 3: hash_h3 = new H3; break;
618
919
  case 4: hash_h4 = new H4; break;
@@ -621,12 +922,14 @@ struct Hashers {
621
922
  case 7: hash_h7 = new H7; break;
622
923
  case 8: hash_h8 = new H8; break;
623
924
  case 9: hash_h9 = new H9; break;
925
+ case 10: hash_h10 = new H10; break;
624
926
  default: break;
625
927
  }
626
928
  }
627
929
 
628
930
  template<typename Hasher>
629
931
  void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
932
+ hasher->Init();
630
933
  for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
631
934
  hasher->Store(&dict[i], static_cast<uint32_t>(i));
632
935
  }
@@ -634,9 +937,8 @@ struct Hashers {
634
937
 
635
938
  // Custom LZ77 window.
636
939
  void PrependCustomDictionary(
637
- int type, const size_t size, const uint8_t* dict) {
940
+ int type, int lgwin, const size_t size, const uint8_t* dict) {
638
941
  switch (type) {
639
- case 1: WarmupHash(size, dict, hash_h1); break;
640
942
  case 2: WarmupHash(size, dict, hash_h2); break;
641
943
  case 3: WarmupHash(size, dict, hash_h3); break;
642
944
  case 4: WarmupHash(size, dict, hash_h4); break;
@@ -645,12 +947,17 @@ struct Hashers {
645
947
  case 7: WarmupHash(size, dict, hash_h7); break;
646
948
  case 8: WarmupHash(size, dict, hash_h8); break;
647
949
  case 9: WarmupHash(size, dict, hash_h9); break;
950
+ case 10:
951
+ hash_h10->Init(lgwin, 0, size, false);
952
+ for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
953
+ hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
954
+ }
955
+ break;
648
956
  default: break;
649
957
  }
650
958
  }
651
959
 
652
960
 
653
- H1* hash_h1;
654
961
  H2* hash_h2;
655
962
  H3* hash_h3;
656
963
  H4* hash_h4;
@@ -659,6 +966,7 @@ struct Hashers {
659
966
  H7* hash_h7;
660
967
  H8* hash_h8;
661
968
  H9* hash_h9;
969
+ H10* hash_h10;
662
970
  };
663
971
 
664
972
  } // namespace brotli