RubyGems - brotli - Versions diffs - 0.1.1 → 0.1.2 - Mend

brotli 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

checksums.yaml +4 -4
data/ext/brotli/brotli.cc +114 -24
data/ext/brotli/brotli.h +0 -1
data/ext/brotli/extconf.rb +30 -23
data/lib/brotli/version.rb +1 -1
data/vendor/brotli/LICENSE +1 -1
data/vendor/brotli/dec/Makefile +1 -1
data/vendor/brotli/dec/bit_reader.c +3 -3
data/vendor/brotli/dec/bit_reader.h +25 -27
data/vendor/brotli/dec/context.h +4 -4
data/vendor/brotli/dec/decode.c +410 -486
data/vendor/brotli/dec/decode.h +101 -105
data/vendor/brotli/dec/dictionary.c +1 -1
data/vendor/brotli/dec/dictionary.h +7 -8
data/vendor/brotli/dec/huffman.c +103 -105
data/vendor/brotli/dec/huffman.h +18 -18
data/vendor/brotli/dec/port.h +52 -40
data/vendor/brotli/dec/prefix.h +2 -0
data/vendor/brotli/dec/state.c +13 -19
data/vendor/brotli/dec/state.h +25 -39
data/vendor/brotli/dec/transform.h +38 -44
data/vendor/brotli/dec/types.h +2 -2
data/vendor/brotli/enc/Makefile +1 -1
data/vendor/brotli/enc/backward_references.cc +455 -359
data/vendor/brotli/enc/backward_references.h +79 -3
data/vendor/brotli/enc/bit_cost.h +54 -32
data/vendor/brotli/enc/block_splitter.cc +285 -193
data/vendor/brotli/enc/block_splitter.h +4 -12
data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
data/vendor/brotli/enc/cluster.h +161 -120
data/vendor/brotli/enc/command.h +60 -37
data/vendor/brotli/enc/compress_fragment.cc +701 -0
data/vendor/brotli/enc/compress_fragment.h +47 -0
data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
data/vendor/brotli/enc/compressor.h +15 -0
data/vendor/brotli/enc/context.h +1 -1
data/vendor/brotli/enc/dictionary.h +2 -2
data/vendor/brotli/enc/encode.cc +819 -286
data/vendor/brotli/enc/encode.h +38 -15
data/vendor/brotli/enc/encode_parallel.cc +40 -42
data/vendor/brotli/enc/entropy_encode.cc +144 -147
data/vendor/brotli/enc/entropy_encode.h +32 -8
data/vendor/brotli/enc/entropy_encode_static.h +572 -0
data/vendor/brotli/enc/fast_log.h +7 -40
data/vendor/brotli/enc/find_match_length.h +9 -9
data/vendor/brotli/enc/hash.h +462 -154
data/vendor/brotli/enc/histogram.cc +6 -6
data/vendor/brotli/enc/histogram.h +13 -13
data/vendor/brotli/enc/literal_cost.cc +45 -45
data/vendor/brotli/enc/metablock.cc +92 -89
data/vendor/brotli/enc/metablock.h +12 -12
data/vendor/brotli/enc/port.h +7 -16
data/vendor/brotli/enc/prefix.h +23 -22
data/vendor/brotli/enc/ringbuffer.h +75 -29
data/vendor/brotli/enc/static_dict.cc +56 -48
data/vendor/brotli/enc/static_dict.h +5 -5
data/vendor/brotli/enc/streams.cc +1 -1
data/vendor/brotli/enc/streams.h +5 -5
data/vendor/brotli/enc/transform.h +40 -35
data/vendor/brotli/enc/types.h +2 -0
data/vendor/brotli/enc/utf8_util.cc +3 -2
data/vendor/brotli/enc/write_bits.h +6 -6
metadata +9 -5
data/vendor/brotli/dec/streams.c +0 -102
data/vendor/brotli/dec/streams.h +0 -95

data/vendor/brotli/enc/hash.h CHANGED Viewed

@@ -10,12 +10,10 @@
 #ifndef BROTLI_ENC_HASH_H_
 #define BROTLI_ENC_HASH_H_
-#include <string.h>
 #include <sys/types.h>
 #include <algorithm>
-#include <cstdlib>
-#include <memory>
-#include <string>
+#include <cstring>
+#include <limits>
 #include "./dictionary_hash.h"
 #include "./fast_log.h"
@@ -28,15 +26,20 @@
 namespace brotli {
-static const int kDistanceCacheIndex[] = {
+static const size_t kMaxTreeSearchDepth = 64;
+static const size_t kMaxTreeCompLength = 128;
+static const uint32_t kDistanceCacheIndex[] = {
   0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 };
 static const int kDistanceCacheOffset[] = {
   0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
 };
-static const int kCutoffTransformsCount = 10;
-static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
+static const uint32_t kCutoffTransformsCount = 10;
+static const uint8_t kCutoffTransforms[] = {
+  0, 12, 27, 23, 42, 63, 56, 48, 59, 64
+};
 // kHashMul32 multiplier has these properties:
 // * The multiplier must be odd. Otherwise we may lose the highest bit.
@@ -68,41 +71,47 @@ inline uint32_t Hash(const uint8_t *data) {
 // This function is used to sometimes discard a longer backward reference
 // when it is not much longer and the bit cost for encoding it is more
 // than the saved literals.
-inline double BackwardReferenceScore(int copy_length,
-                                     int backward_reference_offset) {
-  return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
+//
+// backward_reference_offset MUST be positive.
+inline double BackwardReferenceScore(size_t copy_length,
+                                     size_t backward_reference_offset) {
+  return 5.4 * static_cast<double>(copy_length) -
+      1.20 * Log2FloorNonZero(backward_reference_offset);
 }
-inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
-                                                      int distance_short_code) {
+inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
+    size_t distance_short_code) {
   static const double kDistanceShortCodeBitCost[16] = {
     -0.6, 0.95, 1.17, 1.27,
     0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
     1.05, 1.05, 1.15, 1.15, 1.25, 1.25
   };
-  return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
+  return 5.4 * static_cast<double>(copy_length) -
+      kDistanceShortCodeBitCost[distance_short_code];
 }
 struct BackwardMatch {
-  BackwardMatch() : distance(0), length_and_code(0) {}
+  BackwardMatch(void) : distance(0), length_and_code(0) {}
-  BackwardMatch(int dist, int len)
-      : distance(dist), length_and_code((len << 5)) {}
+  BackwardMatch(size_t dist, size_t len)
+      : distance(static_cast<uint32_t>(dist))
+      , length_and_code(static_cast<uint32_t>(len << 5)) {}
-  BackwardMatch(int dist, int len, int len_code)
-      : distance(dist),
-        length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
+  BackwardMatch(size_t dist, size_t len, size_t len_code)
+      : distance(static_cast<uint32_t>(dist))
+      , length_and_code(static_cast<uint32_t>(
+            (len << 5) | (len == len_code ? 0 : len_code))) {}
-  int length() const {
+  size_t length(void) const {
     return length_and_code >> 5;
   }
-  int length_code() const {
-    int code = length_and_code & 31;
+  size_t length_code(void) const {
+    size_t code = length_and_code & 31;
     return code ? code : length();
   }
-  int distance;
-  int length_and_code;
+  uint32_t distance;
+  uint32_t length_and_code;
 };
 // A (forgetful) hash table to the data seen by the compressor, to
@@ -113,18 +122,31 @@ struct BackwardMatch {
 template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
 class HashLongestMatchQuickly {
  public:
-  HashLongestMatchQuickly() {
+  HashLongestMatchQuickly(void) {
     Reset();
   }
-  void Reset() {
-    // It is not strictly necessary to fill this buffer here, but
-    // not filling will make the results of the compression stochastic
-    // (but correct). This is because random data would cause the
-    // system to find accidentally good backward references here and there.
-    memset(&buckets_[0], 0, sizeof(buckets_));
+  void Reset(void) {
+    need_init_ = true;
     num_dict_lookups_ = 0;
     num_dict_matches_ = 0;
   }
+  void Init(void) {
+    if (need_init_) {
+      // It is not strictly necessary to fill this buffer here, but
+      // not filling will make the results of the compression stochastic
+      // (but correct). This is because random data would cause the
+      // system to find accidentally good backward references here and there.
+      memset(&buckets_[0], 0, sizeof(buckets_));
+      need_init_ = false;
+    }
+  }
+  void InitForData(const uint8_t* data, size_t num) {
+    for (size_t i = 0; i < num; ++i) {
+      const uint32_t key = HashBytes(&data[i]);
+      memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
+      need_init_ = false;
+    }
+  }
   // Look at 4 bytes at data.
   // Compute a hash from these, and store the value somewhere within
   // [ix .. ix+3].
@@ -136,7 +158,8 @@ class HashLongestMatchQuickly {
   }
   // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
-  // up to the length of max_length.
+  // up to the length of max_length and stores the position cur_ix in the
+  // hash table.
   //
   // Does not look for matches longer than max_length.
   // Does not look for matches further away than max_backward.
@@ -146,27 +169,28 @@ class HashLongestMatchQuickly {
   inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
                                const size_t ring_buffer_mask,
                                const int* __restrict distance_cache,
-                               const uint32_t cur_ix,
-                               const int max_length,
-                               const uint32_t max_backward,
-                               int * __restrict best_len_out,
-                               int * __restrict best_len_code_out,
-                               int * __restrict best_distance_out,
+                               const size_t cur_ix,
+                               const size_t max_length,
+                               const size_t max_backward,
+                               size_t * __restrict best_len_out,
+                               size_t * __restrict best_len_code_out,
+                               size_t * __restrict best_distance_out,
                                double* __restrict best_score_out) {
-    const int best_len_in = *best_len_out;
+    const size_t best_len_in = *best_len_out;
     const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
     int compare_char = ring_buffer[cur_ix_masked + best_len_in];
     double best_score = *best_score_out;
-    int best_len = best_len_in;
-    int cached_backward = distance_cache[0];
-    uint32_t prev_ix = cur_ix - cached_backward;
+    size_t best_len = best_len_in;
+    size_t cached_backward = static_cast<size_t>(distance_cache[0]);
+    size_t prev_ix = cur_ix - cached_backward;
     bool match_found = false;
     if (prev_ix < cur_ix) {
       prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
       if (compare_char == ring_buffer[prev_ix + best_len]) {
-        int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                           &ring_buffer[cur_ix_masked],
-                                           max_length);
+        size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                              &ring_buffer[cur_ix_masked],
+                                              max_length);
         if (len >= 4) {
           best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
           best_len = len;
@@ -176,6 +200,7 @@ class HashLongestMatchQuickly {
           *best_score_out = best_score;
           compare_char = ring_buffer[cur_ix_masked + best_len];
           if (kBucketSweep == 1) {
+            buckets_[key] = static_cast<uint32_t>(cur_ix);
             return true;
           } else {
             match_found = true;
@@ -183,11 +208,11 @@ class HashLongestMatchQuickly {
         }
       }
     }
-    const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
     if (kBucketSweep == 1) {
       // Only one to look for, don't bother to prepare for a loop.
       prev_ix = buckets_[key];
-      uint32_t backward = cur_ix - prev_ix;
+      buckets_[key] = static_cast<uint32_t>(cur_ix);
+      size_t backward = cur_ix - prev_ix;
       prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
       if (compare_char != ring_buffer[prev_ix + best_len_in]) {
         return false;
@@ -195,9 +220,9 @@ class HashLongestMatchQuickly {
       if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
         return false;
       }
-      const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                               &ring_buffer[cur_ix_masked],
-                                               max_length);
+      const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                                  &ring_buffer[cur_ix_masked],
+                                                  max_length);
       if (len >= 4) {
         *best_len_out = len;
         *best_len_code_out = len;
@@ -209,7 +234,7 @@ class HashLongestMatchQuickly {
       uint32_t *bucket = buckets_ + key;
       prev_ix = *bucket++;
       for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
-        const uint32_t backward = cur_ix - prev_ix;
+        const size_t backward = cur_ix - prev_ix;
         prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
         if (compare_char != ring_buffer[prev_ix + best_len]) {
           continue;
@@ -217,10 +242,9 @@ class HashLongestMatchQuickly {
         if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
           continue;
         }
-        const int len =
-            FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                     &ring_buffer[cur_ix_masked],
-                                     max_length);
+        const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                                    &ring_buffer[cur_ix_masked],
+                                                    max_length);
         if (len >= 4) {
           const double score = BackwardReferenceScore(len, backward);
           if (best_score < score) {
@@ -242,19 +266,20 @@ class HashLongestMatchQuickly {
       const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
       const uint16_t v = kStaticDictionaryHash[dict_key];
       if (v > 0) {
-        const int len = v & 31;
-        const int dist = v >> 5;
-        const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
+        const uint32_t len = v & 31;
+        const uint32_t dist = v >> 5;
+        const size_t offset =
+            kBrotliDictionaryOffsetsByLength[len] + len * dist;
         if (len <= max_length) {
-          const int matchlen =
+          const size_t matchlen =
               FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
                                        &kBrotliDictionary[offset], len);
-          if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
-            const int transform_id = kCutoffTransforms[len - matchlen];
-            const int word_id =
-                transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
+          if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+            const size_t transform_id = kCutoffTransforms[len - matchlen];
+            const size_t word_id =
+                transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
                 dist;
-            const int backward = max_backward + word_id + 1;
+            const size_t backward = max_backward + word_id + 1;
             const double score = BackwardReferenceScore(matchlen, backward);
             if (best_score < score) {
               ++num_dict_matches_;
@@ -264,12 +289,14 @@ class HashLongestMatchQuickly {
               *best_len_code_out = len;
               *best_distance_out = backward;
               *best_score_out = best_score;
-              return true;
+              match_found = true;
             }
           }
         }
       }
     }
+    const uint32_t off = (cur_ix >> 3) % kBucketSweep;
+    buckets_[key + off] = static_cast<uint32_t>(cur_ix);
     return match_found;
   }
@@ -287,16 +314,17 @@ class HashLongestMatchQuickly {
     return static_cast<uint32_t>(h >> (64 - kBucketBits));
   }
+  enum { kHashMapSize = 4 << kBucketBits };
  private:
   static const uint32_t kBucketSize = 1 << kBucketBits;
   uint32_t buckets_[kBucketSize + kBucketSweep];
+  // True if buckets_ array needs to be initialized.
+  bool need_init_;
   size_t num_dict_lookups_;
   size_t num_dict_matches_;
 };
-// The maximum length for which the zopflification uses distinct distances.
-static const int kMaxZopfliLen = 325;
 // A (forgetful) hash table to the data seen by the compressor, to
 // help create backward references to previous data.
 //
@@ -308,16 +336,31 @@ template <int kBucketBits,
           int kNumLastDistancesToCheck>
 class HashLongestMatch {
  public:
-  HashLongestMatch() {
+  HashLongestMatch(void) {
     Reset();
   }
-  void Reset() {
-    memset(&num_[0], 0, sizeof(num_));
+  void Reset(void) {
+    need_init_ = true;
     num_dict_lookups_ = 0;
     num_dict_matches_ = 0;
   }
+  void Init(void) {
+    if (need_init_) {
+      memset(&num_[0], 0, sizeof(num_));
+      need_init_ = false;
+    }
+  }
+  void InitForData(const uint8_t* data, size_t num) {
+    for (size_t i = 0; i < num; ++i) {
+      const uint32_t key = HashBytes(&data[i]);
+      num_[key] = 0;
+      need_init_ = false;
+    }
+  }
   // Look at 3 bytes at data.
   // Compute a hash from these, and store the value of ix at that position.
   inline void Store(const uint8_t *data, const uint32_t ix) {
@@ -328,7 +371,7 @@ class HashLongestMatch {
   }
   // Find a longest backward match of &data[cur_ix] up to the length of
-  // max_length.
+  // max_length and stores the position cur_ix in the hash table.
   //
   // Does not look for matches longer than max_length.
   // Does not look for matches further away than max_backward.
@@ -339,41 +382,42 @@ class HashLongestMatch {
   bool FindLongestMatch(const uint8_t * __restrict data,
                         const size_t ring_buffer_mask,
                         const int* __restrict distance_cache,
-                        const uint32_t cur_ix,
-                        const int max_length,
-                        const uint32_t max_backward,
-                        int * __restrict best_len_out,
-                        int * __restrict best_len_code_out,
-                        int * __restrict best_distance_out,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        size_t * __restrict best_len_out,
+                        size_t * __restrict best_len_code_out,
+                        size_t * __restrict best_distance_out,
                         double * __restrict best_score_out) {
     *best_len_code_out = 0;
     const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
     bool match_found = false;
     // Don't accept a short copy from far away.
     double best_score = *best_score_out;
-    int best_len = *best_len_out;
+    size_t best_len = *best_len_out;
     *best_len_out = 0;
     // Try last distance first.
-    for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
-      const int idx = kDistanceCacheIndex[i];
-      const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
-      uint32_t prev_ix = cur_ix - backward;
+    for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
+      const size_t idx = kDistanceCacheIndex[i];
+      const size_t backward =
+          static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
+      size_t prev_ix = static_cast<size_t>(cur_ix - backward);
       if (prev_ix >= cur_ix) {
         continue;
       }
-      if (PREDICT_FALSE(backward > (int)max_backward)) {
+      if (PREDICT_FALSE(backward > max_backward)) {
         continue;
       }
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      prev_ix &= ring_buffer_mask;
       if (cur_ix_masked + best_len > ring_buffer_mask ||
           prev_ix + best_len > ring_buffer_mask ||
           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
         continue;
       }
-      const int len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
       if (len >= 3 || (len == 2 && i < 2)) {
         // Comparing for >= 2 does not change the semantics, but just saves for
         // a few unnecessary binary logarithms in backward reference score,
@@ -392,22 +436,23 @@ class HashLongestMatch {
     }
     const uint32_t key = HashBytes(&data[cur_ix_masked]);
     const uint32_t * __restrict const bucket = &buckets_[key][0];
-    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
-    for (int i = num_[key] - 1; i >= down; --i) {
-      uint32_t prev_ix = bucket[i & kBlockMask];
-      const uint32_t backward = cur_ix - prev_ix;
+    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    for (size_t i = num_[key]; i > down;) {
+      --i;
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
       if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
         break;
       }
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      prev_ix &= ring_buffer_mask;
       if (cur_ix_masked + best_len > ring_buffer_mask ||
           prev_ix + best_len > ring_buffer_mask ||
           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
         continue;
       }
-      const int len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
       if (len >= 4) {
         // Comparing for >= 3 does not change the semantics, but just saves
         // for a few unnecessary binary logarithms in backward reference
@@ -424,25 +469,28 @@ class HashLongestMatch {
         }
       }
     }
+    buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
+    ++num_[key];
     if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
-      uint32_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
+      size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
       for (int k = 0; k < 2; ++k, ++dict_key) {
         ++num_dict_lookups_;
         const uint16_t v = kStaticDictionaryHash[dict_key];
         if (v > 0) {
-          const int len = v & 31;
-          const int dist = v >> 5;
-          const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
+          const size_t len = v & 31;
+          const size_t dist = v >> 5;
+          const size_t offset =
+              kBrotliDictionaryOffsetsByLength[len] + len * dist;
           if (len <= max_length) {
-            const int matchlen =
+            const size_t matchlen =
                 FindMatchLengthWithLimit(&data[cur_ix_masked],
                                          &kBrotliDictionary[offset], len);
-            if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
-              const int transform_id = kCutoffTransforms[len - matchlen];
-              const int word_id =
+            if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+              const size_t transform_id = kCutoffTransforms[len - matchlen];
+              const size_t word_id =
                   transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
                   dist;
-              const int backward = max_backward + word_id + 1;
+              const size_t backward = max_backward + word_id + 1;
               double score = BackwardReferenceScore(matchlen, backward);
               if (best_score < score) {
                 ++num_dict_matches_;
@@ -462,28 +510,25 @@ class HashLongestMatch {
     return match_found;
   }
-  // Similar to FindLongestMatch(), but finds all matches.
+  // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+  // length of max_length and stores the position cur_ix in the hash table.
   //
   // Sets *num_matches to the number of matches found, and stores the found
-  // matches in matches[0] to matches[*num_matches - 1].
-  //
-  // If the longest match is longer than kMaxZopfliLen, returns only this
-  // longest match.
-  //
-  // Requires that at least kMaxZopfliLen space is available in matches.
-  void FindAllMatches(const uint8_t* data,
-                      const size_t ring_buffer_mask,
-                      const uint32_t cur_ix,
-                      const int max_length,
-                      const uint32_t max_backward,
-                      int* num_matches,
-                      BackwardMatch* matches) const {
+  // matches in matches[0] to matches[*num_matches - 1]. The matches will be
+  // sorted by strictly increasing length and (non-strictly) increasing
+  // distance.
+  size_t FindAllMatches(const uint8_t* data,
+                        const size_t ring_buffer_mask,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        BackwardMatch* matches) {
     BackwardMatch* const orig_matches = matches;
     const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    int best_len = 1;
-    int stop = static_cast<int>(cur_ix) - 64;
-    if (stop < 0) { stop = 0; }
-    for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+    size_t best_len = 1;
+    size_t stop = cur_ix - 64;
+    if (cur_ix < 64) { stop = 0; }
+    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
       size_t prev_ix = i;
       const size_t backward = cur_ix - prev_ix;
       if (PREDICT_FALSE(backward > max_backward)) {
@@ -494,57 +539,57 @@ class HashLongestMatch {
           data[cur_ix_masked + 1] != data[prev_ix + 1]) {
         continue;
       }
-      const int len =
+      const size_t len =
           FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
                                    max_length);
       if (len > best_len) {
         best_len = len;
-        if (len > kMaxZopfliLen) {
-          matches = orig_matches;
-        }
-        *matches++ = BackwardMatch(static_cast<int>(backward), len);
+        *matches++ = BackwardMatch(backward, len);
       }
     }
     const uint32_t key = HashBytes(&data[cur_ix_masked]);
     const uint32_t * __restrict const bucket = &buckets_[key][0];
-    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
-    for (int i = num_[key] - 1; i >= down; --i) {
-      uint32_t prev_ix = bucket[i & kBlockMask];
-      const uint32_t backward = cur_ix - prev_ix;
+    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    for (size_t i = num_[key]; i > down;) {
+      --i;
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
       if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
         break;
       }
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      prev_ix &= ring_buffer_mask;
       if (cur_ix_masked + best_len > ring_buffer_mask ||
           prev_ix + best_len > ring_buffer_mask ||
           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
         continue;
       }
-      const int len =
+      const size_t len =
           FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
                                    max_length);
       if (len > best_len) {
         best_len = len;
-        if (len > kMaxZopfliLen) {
-          matches = orig_matches;
-        }
         *matches++ = BackwardMatch(backward, len);
       }
     }
-    std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
-    int minlen = std::max<int>(4, best_len + 1);
+    buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
+    ++num_[key];
+    uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
+    for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
+      dict_matches[i] = kInvalidMatch;
+    }
+    size_t minlen = std::max<size_t>(4, best_len + 1);
     if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
                                        &dict_matches[0])) {
-      int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
-      for (int l = minlen; l <= maxlen; ++l) {
-        int dict_id = dict_matches[l];
+      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
+      for (size_t l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
         if (dict_id < kInvalidMatch) {
           *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
                                      dict_id & 31);
         }
       }
     }
-    *num_matches += static_cast<int>(matches - orig_matches);
+    return static_cast<size_t>(matches - orig_matches);
   }
   enum { kHashLength = 4 };
@@ -560,6 +605,10 @@ class HashLongestMatch {
     return h >> (32 - kBucketBits);
   }
+  enum { kHashMapSize = 2 << kBucketBits };
+  static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
  private:
   // Number of hash buckets.
   static const uint32_t kBucketSize = 1 << kBucketBits;
@@ -577,29 +626,281 @@ class HashLongestMatch {
   // Buckets containing kBlockSize of backward references.
   uint32_t buckets_[kBucketSize][kBlockSize];
+  // True if num_ array needs to be initialized.
+  bool need_init_;
   size_t num_dict_lookups_;
   size_t num_dict_matches_;
 };
+// A (forgetful) hash table where each hash bucket contains a binary tree of
+// sequences whose first 4 bytes share the same hash code.
+// Each sequence is kMaxTreeCompLength long and is identified by its starting
+// position in the input data. The binary tree is sorted by the lexicographic
+// order of the sequences, and it is also a max-heap with respect to the
+// starting positions.
+class HashToBinaryTree {
+ public:
+  HashToBinaryTree() : forest_(NULL) {
+    Reset();
+  }
+  ~HashToBinaryTree() {
+    delete[] forest_;
+  }
+  void Reset() {
+    need_init_ = true;
+  }
+  void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
+    if (need_init_) {
+      window_mask_ = (1u << lgwin) - 1u;
+      invalid_pos_ = static_cast<uint32_t>(-window_mask_);
+      for (uint32_t i = 0; i < kBucketSize; i++) {
+        buckets_[i] = invalid_pos_;
+      }
+      size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
+      forest_ = new uint32_t[2 * num_nodes];
+      need_init_ = false;
+    }
+  }
+  // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+  // length of max_length and stores the position cur_ix in the hash table.
+  //
+  // Sets *num_matches to the number of matches found, and stores the found
+  // matches in matches[0] to matches[*num_matches - 1]. The matches will be
+  // sorted by strictly increasing length and (non-strictly) increasing
+  // distance.
+  size_t FindAllMatches(const uint8_t* data,
+                        const size_t ring_buffer_mask,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        BackwardMatch* matches) {
+    BackwardMatch* const orig_matches = matches;
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    size_t best_len = 1;
+    size_t stop = cur_ix - 64;
+    if (cur_ix < 64) { stop = 0; }
+    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+      size_t prev_ix = i;
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (data[cur_ix_masked] != data[prev_ix] ||
+          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        *matches++ = BackwardMatch(backward, len);
+      }
+    }
+    if (best_len < max_length) {
+      matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
+                                    max_length, &best_len, matches);
+    }
+    uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
+    for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
+      dict_matches[i] = kInvalidMatch;
+    }
+    size_t minlen = std::max<size_t>(4, best_len + 1);
+    if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
+                                       &dict_matches[0])) {
+      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
+      for (size_t l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
+        if (dict_id < kInvalidMatch) {
+          *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
+                                     dict_id & 31);
+        }
+      }
+    }
+    return static_cast<size_t>(matches - orig_matches);
+  }
+  // Stores the hash of the next 4 bytes and re-roots the binary tree at the
+  // current sequence, without returning any matches.
+  // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
+  void Store(const uint8_t* data,
+             const size_t ring_buffer_mask,
+             const size_t cur_ix) {
+    size_t best_len = 0;
+    StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
+                        &best_len, NULL);
+  }
+  void StitchToPreviousBlock(size_t num_bytes,
+                             size_t position,
+                             const uint8_t* ringbuffer,
+                             size_t ringbuffer_mask) {
+    if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
+      // Store the last `kMaxTreeCompLength - 1` positions in the hasher.
+      // These could not be calculated before, since they require knowledge
+      // of both the previous and the current block.
+      const size_t i_start = position - kMaxTreeCompLength + 1;
+      const size_t i_end = std::min(position, i_start + num_bytes);
+      for (size_t i = i_start; i < i_end; ++i) {
+        // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
+        // end of the current block and that we have at least
+        // kMaxTreeCompLength tail in the ringbuffer.
+        Store(ringbuffer, ringbuffer_mask, i);
+      }
+    }
+  }
+  static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
+ private:
+  // Stores the hash of the next 4 bytes and in a single tree-traversal, the
+  // hash bucket's binary tree is searched for matches and is re-rooted at the
+  // current position.
+  //
+  // If less than kMaxTreeCompLength data is available, the hash bucket of the
+  // current position is searched for matches, but the state of the hash table
+  // is not changed, since we can not know the final sorting order of the
+  // current (incomplete) sequence.
+  //
+  // This function must be called with increasing cur_ix positions.
+  BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
+                                     const size_t cur_ix,
+                                     const size_t ring_buffer_mask,
+                                     const size_t max_length,
+                                     size_t* const __restrict best_len,
+                                     BackwardMatch* __restrict matches) {
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    const size_t max_backward = window_mask_ - 15;
+    const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
+    const bool reroot_tree = max_length >= kMaxTreeCompLength;
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
+    size_t prev_ix = buckets_[key];
+    // The forest index of the rightmost node of the left subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t node_left = LeftChildIndex(cur_ix);
+    // The forest index of the leftmost node of the right subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t node_right = RightChildIndex(cur_ix);
+    // The match length of the rightmost node of the left subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t best_len_left = 0;
+    // The match length of the leftmost node of the right subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t best_len_right = 0;
+    if (reroot_tree) {
+      buckets_[key] = static_cast<uint32_t>(cur_ix);
+    }
+    for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
+      const size_t backward = cur_ix - prev_ix;
+      const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
+      if (backward == 0 || backward > max_backward || depth_remaining == 0) {
+        if (reroot_tree) {
+          forest_[node_left] = invalid_pos_;
+          forest_[node_right] = invalid_pos_;
+        }
+        break;
+      }
+      const size_t cur_len = std::min(best_len_left, best_len_right);
+      const size_t len = cur_len +
+          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
+                                   &data[prev_ix_masked + cur_len],
+                                   max_length - cur_len);
+      if (len > *best_len) {
+        *best_len = len;
+        if (matches) {
+          *matches++ = BackwardMatch(backward, len);
+        }
+        if (len >= max_comp_len) {
+          if (reroot_tree) {
+            forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
+            forest_[node_right] = forest_[RightChildIndex(prev_ix)];
+          }
+          break;
+        }
+      }
+      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
+        best_len_left = len;
+        if (reroot_tree) {
+          forest_[node_left] = static_cast<uint32_t>(prev_ix);
+        }
+        node_left = RightChildIndex(prev_ix);
+        prev_ix = forest_[node_left];
+      } else {
+        best_len_right = len;
+        if (reroot_tree) {
+          forest_[node_right] = static_cast<uint32_t>(prev_ix);
+        }
+        node_right = LeftChildIndex(prev_ix);
+        prev_ix = forest_[node_right];
+      }
+    }
+    return matches;
+  }
+  inline size_t LeftChildIndex(const size_t pos) {
+    return 2 * (pos & window_mask_);
+  }
+  inline size_t RightChildIndex(const size_t pos) {
+    return 2 * (pos & window_mask_) + 1;
+  }
+  static uint32_t HashBytes(const uint8_t *data) {
+    uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+    // The higher bits contain more mixture from the multiplication,
+    // so we take our results from there.
+    return h >> (32 - kBucketBits);
+  }
+  static const int kBucketBits = 17;
+  static const size_t kBucketSize = 1 << kBucketBits;
+  // The window size minus 1
+  size_t window_mask_;
+  // Hash table that maps the 4-byte hashes of the sequence to the last
+  // position where this hash was found, which is the root of the binary
+  // tree of sequences that share this hash bucket.
+  uint32_t buckets_[kBucketSize];
+  // The union of the binary trees of each hash bucket. The root of the tree
+  // corresponding to a hash is a sequence starting at buckets_[hash] and
+  // the left and right children of a sequence starting at pos are
+  // forest_[2 * pos] and forest_[2 * pos + 1].
+  uint32_t* forest_;
+  // A position used to mark a non-existent sequence, i.e. a tree is empty if
+  // its root is at invalid_pos_ and a node is a leaf if both its children
+  // are at invalid_pos_.
+  uint32_t invalid_pos_;
+  bool need_init_;
+};
 struct Hashers {
   // For kBucketSweep == 1, enabling the dictionary lookup makes compression
   // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
   // and html inputs.
-  typedef HashLongestMatchQuickly<16, 1, true> H1;
-  typedef HashLongestMatchQuickly<16, 2, false> H2;
-  typedef HashLongestMatchQuickly<16, 4, false> H3;
+  typedef HashLongestMatchQuickly<16, 1, true> H2;
+  typedef HashLongestMatchQuickly<16, 2, false> H3;
   typedef HashLongestMatchQuickly<17, 4, true> H4;
   typedef HashLongestMatch<14, 4, 4> H5;
   typedef HashLongestMatch<14, 5, 4> H6;
   typedef HashLongestMatch<15, 6, 10> H7;
   typedef HashLongestMatch<15, 7, 10> H8;
   typedef HashLongestMatch<15, 8, 16> H9;
+  typedef HashToBinaryTree H10;
-  Hashers() : hash_h1(0), hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
-              hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0) {}
+  Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
+                  hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
-  ~Hashers() {
-    delete hash_h1;
+  ~Hashers(void) {
     delete hash_h2;
     delete hash_h3;
     delete hash_h4;
@@ -608,11 +909,11 @@ struct Hashers {
     delete hash_h7;
     delete hash_h8;
     delete hash_h9;
+    delete hash_h10;
   }
   void Init(int type) {
     switch (type) {
-      case 1: hash_h1 = new H1; break;
       case 2: hash_h2 = new H2; break;
       case 3: hash_h3 = new H3; break;
       case 4: hash_h4 = new H4; break;
@@ -621,12 +922,14 @@ struct Hashers {
       case 7: hash_h7 = new H7; break;
       case 8: hash_h8 = new H8; break;
       case 9: hash_h9 = new H9; break;
+      case 10: hash_h10 = new H10; break;
       default: break;
     }
   }
   template<typename Hasher>
   void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
+    hasher->Init();
     for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
       hasher->Store(&dict[i], static_cast<uint32_t>(i));
     }
@@ -634,9 +937,8 @@ struct Hashers {
   // Custom LZ77 window.
   void PrependCustomDictionary(
-      int type, const size_t size, const uint8_t* dict) {
+      int type, int lgwin, const size_t size, const uint8_t* dict) {
     switch (type) {
-      case 1: WarmupHash(size, dict, hash_h1); break;
       case 2: WarmupHash(size, dict, hash_h2); break;
       case 3: WarmupHash(size, dict, hash_h3); break;
       case 4: WarmupHash(size, dict, hash_h4); break;
@@ -645,12 +947,17 @@ struct Hashers {
       case 7: WarmupHash(size, dict, hash_h7); break;
       case 8: WarmupHash(size, dict, hash_h8); break;
       case 9: WarmupHash(size, dict, hash_h9); break;
+      case 10:
+        hash_h10->Init(lgwin, 0, size, false);
+        for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
+          hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
+        }
+        break;
       default: break;
     }
   }
-  H1* hash_h1;
   H2* hash_h2;
   H3* hash_h3;
   H4* hash_h4;
@@ -659,6 +966,7 @@ struct Hashers {
   H7* hash_h7;
   H8* hash_h8;
   H9* hash_h9;
+  H10* hash_h10;
 };
 }  // namespace brotli