RubyGems - brotli - Versions diffs - 0.1.3 → 0.1.4 - Mend

brotli 0.1.3 → 0.1.4

Files changed (102) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.travis.yml +11 -3
data/Gemfile +2 -0
data/ext/brotli/brotli.c +279 -0
data/ext/brotli/brotli.h +2 -0
data/ext/brotli/buffer.c +95 -0
data/ext/brotli/buffer.h +19 -0
data/ext/brotli/extconf.rb +21 -81
data/lib/brotli/version.rb +1 -1
data/vendor/brotli/dec/bit_reader.c +5 -5
data/vendor/brotli/dec/bit_reader.h +15 -15
data/vendor/brotli/dec/context.h +1 -1
data/vendor/brotli/dec/decode.c +433 -348
data/vendor/brotli/dec/decode.h +74 -48
data/vendor/brotli/dec/huffman.c +5 -4
data/vendor/brotli/dec/huffman.h +4 -4
data/vendor/brotli/dec/port.h +2 -95
data/vendor/brotli/dec/prefix.h +5 -3
data/vendor/brotli/dec/state.c +15 -27
data/vendor/brotli/dec/state.h +21 -17
data/vendor/brotli/dec/transform.h +1 -1
data/vendor/brotli/enc/backward_references.c +892 -0
data/vendor/brotli/enc/backward_references.h +85 -102
data/vendor/brotli/enc/backward_references_inc.h +147 -0
data/vendor/brotli/enc/bit_cost.c +35 -0
data/vendor/brotli/enc/bit_cost.h +23 -121
data/vendor/brotli/enc/bit_cost_inc.h +127 -0
data/vendor/brotli/enc/block_encoder_inc.h +33 -0
data/vendor/brotli/enc/block_splitter.c +197 -0
data/vendor/brotli/enc/block_splitter.h +40 -50
data/vendor/brotli/enc/block_splitter_inc.h +432 -0
data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
data/vendor/brotli/enc/cluster.c +56 -0
data/vendor/brotli/enc/cluster.h +23 -305
data/vendor/brotli/enc/cluster_inc.h +315 -0
data/vendor/brotli/enc/command.h +83 -76
data/vendor/brotli/enc/compress_fragment.c +747 -0
data/vendor/brotli/enc/compress_fragment.h +48 -37
data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
data/vendor/brotli/enc/compressor.cc +139 -0
data/vendor/brotli/enc/compressor.h +146 -0
data/vendor/brotli/enc/context.h +102 -96
data/vendor/brotli/enc/dictionary_hash.h +9 -5
data/vendor/brotli/enc/encode.c +1562 -0
data/vendor/brotli/enc/encode.h +211 -199
data/vendor/brotli/enc/encode_parallel.cc +161 -151
data/vendor/brotli/enc/encode_parallel.h +7 -8
data/vendor/brotli/enc/entropy_encode.c +501 -0
data/vendor/brotli/enc/entropy_encode.h +107 -89
data/vendor/brotli/enc/entropy_encode_static.h +29 -62
data/vendor/brotli/enc/fast_log.h +26 -20
data/vendor/brotli/enc/find_match_length.h +23 -20
data/vendor/brotli/enc/hash.h +614 -871
data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
data/vendor/brotli/enc/histogram.c +95 -0
data/vendor/brotli/enc/histogram.h +49 -83
data/vendor/brotli/enc/histogram_inc.h +51 -0
data/vendor/brotli/enc/literal_cost.c +178 -0
data/vendor/brotli/enc/literal_cost.h +16 -10
data/vendor/brotli/enc/memory.c +181 -0
data/vendor/brotli/enc/memory.h +62 -0
data/vendor/brotli/enc/metablock.c +515 -0
data/vendor/brotli/enc/metablock.h +87 -57
data/vendor/brotli/enc/metablock_inc.h +183 -0
data/vendor/brotli/enc/port.h +73 -47
data/vendor/brotli/enc/prefix.h +34 -61
data/vendor/brotli/enc/quality.h +130 -0
data/vendor/brotli/enc/ringbuffer.h +137 -122
data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
data/vendor/brotli/enc/static_dict.h +23 -18
data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
data/vendor/brotli/enc/streams.cc +7 -7
data/vendor/brotli/enc/streams.h +32 -32
data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
data/vendor/brotli/enc/utf8_util.h +16 -9
data/vendor/brotli/enc/write_bits.h +49 -43
metadata +34 -25
data/ext/brotli/brotli.cc +0 -181
data/vendor/brotli/dec/Makefile +0 -12
data/vendor/brotli/dec/dictionary.c +0 -9466
data/vendor/brotli/dec/dictionary.h +0 -38
data/vendor/brotli/dec/types.h +0 -38
data/vendor/brotli/enc/Makefile +0 -14
data/vendor/brotli/enc/backward_references.cc +0 -858
data/vendor/brotli/enc/block_splitter.cc +0 -505
data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
data/vendor/brotli/enc/compress_fragment.cc +0 -701
data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
data/vendor/brotli/enc/dictionary.cc +0 -9466
data/vendor/brotli/enc/dictionary.h +0 -41
data/vendor/brotli/enc/encode.cc +0 -1180
data/vendor/brotli/enc/entropy_encode.cc +0 -480
data/vendor/brotli/enc/histogram.cc +0 -67
data/vendor/brotli/enc/literal_cost.cc +0 -165
data/vendor/brotli/enc/metablock.cc +0 -539
data/vendor/brotli/enc/transform.h +0 -248
data/vendor/brotli/enc/types.h +0 -29

data/vendor/brotli/enc/entropy_encode.cc DELETED Viewed

@@ -1,480 +0,0 @@
-/* Copyright 2010 Google Inc. All Rights Reserved.
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-// Entropy encoding (Huffman) utilities.
-#include "./entropy_encode.h"
-#include <algorithm>
-#include <limits>
-#include <cstdlib>
-#include "./histogram.h"
-#include "./port.h"
-#include "./types.h"
-namespace brotli {
-void SetDepth(const HuffmanTree &p,
-              HuffmanTree *pool,
-              uint8_t *depth,
-              uint8_t level) {
-  if (p.index_left_ >= 0) {
-    ++level;
-    SetDepth(pool[p.index_left_], pool, depth, level);
-    SetDepth(pool[p.index_right_or_value_], pool, depth, level);
-  } else {
-    depth[p.index_right_or_value_] = level;
-  }
-}
-// Sort the root nodes, least popular first.
-static inline bool SortHuffmanTree(const HuffmanTree& v0,
-                                   const HuffmanTree& v1) {
-  if (v0.total_count_ != v1.total_count_) {
-    return v0.total_count_ < v1.total_count_;
-  }
-  return v0.index_right_or_value_ > v1.index_right_or_value_;
-}
-// This function will create a Huffman tree.
-//
-// The catch here is that the tree cannot be arbitrarily deep.
-// Brotli specifies a maximum depth of 15 bits for "code trees"
-// and 7 bits for "code length code trees."
-//
-// count_limit is the value that is to be faked as the minimum value
-// and this minimum value is raised until the tree matches the
-// maximum length requirement.
-//
-// This algorithm is not of excellent performance for very long data blocks,
-// especially when population counts are longer than 2**tree_limit, but
-// we are not planning to use this with extremely long blocks.
-//
-// See http://en.wikipedia.org/wiki/Huffman_coding
-void CreateHuffmanTree(const uint32_t *data,
-                       const size_t length,
-                       const int tree_limit,
-                       HuffmanTree* tree,
-                       uint8_t *depth) {
-  // For block sizes below 64 kB, we never need to do a second iteration
-  // of this loop. Probably all of our block sizes will be smaller than
-  // that, so this loop is mostly of academic interest. If we actually
-  // would need this, we would be better off with the Katajainen algorithm.
-  for (uint32_t count_limit = 1; ; count_limit *= 2) {
-    size_t n = 0;
-    for (size_t i = length; i != 0;) {
-      --i;
-      if (data[i]) {
-        const uint32_t count = std::max(data[i], count_limit);
-        tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
-      }
-    }
-    if (n == 1) {
-      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
-      break;
-    }
-    std::sort(tree, tree + n, SortHuffmanTree);
-    // The nodes are:
-    // [0, n): the sorted leaf nodes that we start with.
-    // [n]: we add a sentinel here.
-    // [n + 1, 2n): new parent nodes are added here, starting from
-    //              (n+1). These are naturally in ascending order.
-    // [2n]: we add a sentinel at the end as well.
-    // There will be (2n+1) elements at the end.
-    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
-    tree[n] = sentinel;
-    tree[n + 1] = sentinel;
-    size_t i = 0;      // Points to the next leaf node.
-    size_t j = n + 1;  // Points to the next non-leaf node.
-    for (size_t k = n - 1; k != 0; --k) {
-      size_t left, right;
-      if (tree[i].total_count_ <= tree[j].total_count_) {
-        left = i;
-        ++i;
-      } else {
-        left = j;
-        ++j;
-      }
-      if (tree[i].total_count_ <= tree[j].total_count_) {
-        right = i;
-        ++i;
-      } else {
-        right = j;
-        ++j;
-      }
-      // The sentinel node becomes the parent node.
-      size_t j_end = 2 * n - k;
-      tree[j_end].total_count_ =
-          tree[left].total_count_ + tree[right].total_count_;
-      tree[j_end].index_left_ = static_cast<int16_t>(left);
-      tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
-      // Add back the last sentinel node.
-      tree[j_end + 1] = sentinel;
-    }
-    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
-    // We need to pack the Huffman tree in tree_limit bits.
-    // If this was not successful, add fake entities to the lowest values
-    // and retry.
-    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
-      break;
-    }
-  }
-}
-static void Reverse(uint8_t* v, size_t start, size_t end) {
-  --end;
-  while (start < end) {
-    uint8_t tmp = v[start];
-    v[start] = v[end];
-    v[end] = tmp;
-    ++start;
-    --end;
-  }
-}
-static void WriteHuffmanTreeRepetitions(
-    const uint8_t previous_value,
-    const uint8_t value,
-    size_t repetitions,
-    size_t* tree_size,
-    uint8_t* tree,
-    uint8_t* extra_bits_data) {
-  assert(repetitions > 0);
-  if (previous_value != value) {
-    tree[*tree_size] = value;
-    extra_bits_data[*tree_size] = 0;
-    ++(*tree_size);
-    --repetitions;
-  }
-  if (repetitions == 7) {
-    tree[*tree_size] = value;
-    extra_bits_data[*tree_size] = 0;
-    ++(*tree_size);
-    --repetitions;
-  }
-  if (repetitions < 3) {
-    for (size_t i = 0; i < repetitions; ++i) {
-      tree[*tree_size] = value;
-      extra_bits_data[*tree_size] = 0;
-      ++(*tree_size);
-    }
-  } else {
-    repetitions -= 3;
-    size_t start = *tree_size;
-    while (true) {
-      tree[*tree_size] = 16;
-      extra_bits_data[*tree_size] = repetitions & 0x3;
-      ++(*tree_size);
-      repetitions >>= 2;
-      if (repetitions == 0) {
-        break;
-      }
-      --repetitions;
-    }
-    Reverse(tree, start, *tree_size);
-    Reverse(extra_bits_data, start, *tree_size);
-  }
-}
-static void WriteHuffmanTreeRepetitionsZeros(
-    size_t repetitions,
-    size_t* tree_size,
-    uint8_t* tree,
-    uint8_t* extra_bits_data) {
-  if (repetitions == 11) {
-    tree[*tree_size] = 0;
-    extra_bits_data[*tree_size] = 0;
-    ++(*tree_size);
-    --repetitions;
-  }
-  if (repetitions < 3) {
-    for (size_t i = 0; i < repetitions; ++i) {
-      tree[*tree_size] = 0;
-      extra_bits_data[*tree_size] = 0;
-      ++(*tree_size);
-    }
-  } else {
-    repetitions -= 3;
-    size_t start = *tree_size;
-    while (true) {
-      tree[*tree_size] = 17;
-      extra_bits_data[*tree_size] = repetitions & 0x7;
-      ++(*tree_size);
-      repetitions >>= 3;
-      if (repetitions == 0) {
-        break;
-      }
-      --repetitions;
-    }
-    Reverse(tree, start, *tree_size);
-    Reverse(extra_bits_data, start, *tree_size);
-  }
-}
-void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
-                                 uint8_t* good_for_rle) {
-  size_t nonzero_count = 0;
-  size_t stride;
-  size_t limit;
-  size_t sum;
-  const size_t streak_limit = 1240;
-  // Let's make the Huffman code more compatible with rle encoding.
-  size_t i;
-  for (i = 0; i < length; i++) {
-    if (counts[i]) {
-      ++nonzero_count;
-    }
-  }
-  if (nonzero_count < 16) {
-    return;
-  }
-  while (length != 0 && counts[length - 1] == 0) {
-    --length;
-  }
-  if (length == 0) {
-    return;  // All zeros.
-  }
-  // Now counts[0..length - 1] does not have trailing zeros.
-  {
-    size_t nonzeros = 0;
-    uint32_t smallest_nonzero = 1 << 30;
-    for (i = 0; i < length; ++i) {
-      if (counts[i] != 0) {
-        ++nonzeros;
-        if (smallest_nonzero > counts[i]) {
-          smallest_nonzero = counts[i];
-        }
-      }
-    }
-    if (nonzeros < 5) {
-      // Small histogram will model it well.
-      return;
-    }
-    size_t zeros = length - nonzeros;
-    if (smallest_nonzero < 4) {
-      if (zeros < 6) {
-        for (i = 1; i < length - 1; ++i) {
-          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
-            counts[i] = 1;
-          }
-        }
-      }
-    }
-    if (nonzeros < 28) {
-      return;
-    }
-  }
-  // 2) Let's mark all population counts that already can be encoded
-  // with an rle code.
-  memset(good_for_rle, 0, length);
-  {
-    // Let's not spoil any of the existing good rle codes.
-    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
-    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
-    uint32_t symbol = counts[0];
-    size_t step = 0;
-    for (i = 0; i <= length; ++i) {
-      if (i == length || counts[i] != symbol) {
-        if ((symbol == 0 && step >= 5) ||
-            (symbol != 0 && step >= 7)) {
-          size_t k;
-          for (k = 0; k < step; ++k) {
-            good_for_rle[i - k - 1] = 1;
-          }
-        }
-        step = 1;
-        if (i != length) {
-          symbol = counts[i];
-        }
-      } else {
-        ++step;
-      }
-    }
-  }
-  // 3) Let's replace those population counts that lead to more rle codes.
-  // Math here is in 24.8 fixed point representation.
-  stride = 0;
-  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
-  sum = 0;
-  for (i = 0; i <= length; ++i) {
-    if (i == length || good_for_rle[i] ||
-        (i != 0 && good_for_rle[i - 1]) ||
-        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
-      if (stride >= 4 || (stride >= 3 && sum == 0)) {
-        size_t k;
-        // The stride must end, collapse what we have, if we have enough (4).
-        size_t count = (sum + stride / 2) / stride;
-        if (count == 0) {
-          count = 1;
-        }
-        if (sum == 0) {
-          // Don't make an all zeros stride to be upgraded to ones.
-          count = 0;
-        }
-        for (k = 0; k < stride; ++k) {
-          // We don't want to change value at counts[i],
-          // that is already belonging to the next stride. Thus - 1.
-          counts[i - k - 1] = static_cast<uint32_t>(count);
-        }
-      }
-      stride = 0;
-      sum = 0;
-      if (i < length - 2) {
-        // All interesting strides have a count of at least 4,
-        // at least when non-zeros.
-        limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
-      } else if (i < length) {
-        limit = 256 * counts[i];
-      } else {
-        limit = 0;
-      }
-    }
-    ++stride;
-    if (i != length) {
-      sum += counts[i];
-      if (stride >= 4) {
-        limit = (256 * sum + stride / 2) / stride;
-      }
-      if (stride == 4) {
-        limit += 120;
-      }
-    }
-  }
-}
-static void DecideOverRleUse(const uint8_t* depth, const size_t length,
-                             bool *use_rle_for_non_zero,
-                             bool *use_rle_for_zero) {
-  size_t total_reps_zero = 0;
-  size_t total_reps_non_zero = 0;
-  size_t count_reps_zero = 1;
-  size_t count_reps_non_zero = 1;
-  for (size_t i = 0; i < length;) {
-    const uint8_t value = depth[i];
-    size_t reps = 1;
-    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
-      ++reps;
-    }
-    if (reps >= 3 && value == 0) {
-      total_reps_zero += reps;
-      ++count_reps_zero;
-    }
-    if (reps >= 4 && value != 0) {
-      total_reps_non_zero += reps;
-      ++count_reps_non_zero;
-    }
-    i += reps;
-  }
-  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
-  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
-}
-void WriteHuffmanTree(const uint8_t* depth,
-                      size_t length,
-                      size_t* tree_size,
-                      uint8_t* tree,
-                      uint8_t* extra_bits_data) {
-  uint8_t previous_value = 8;
-  // Throw away trailing zeros.
-  size_t new_length = length;
-  for (size_t i = 0; i < length; ++i) {
-    if (depth[length - i - 1] == 0) {
-      --new_length;
-    } else {
-      break;
-    }
-  }
-  // First gather statistics on if it is a good idea to do rle.
-  bool use_rle_for_non_zero = false;
-  bool use_rle_for_zero = false;
-  if (length > 50) {
-    // Find rle coding for longer codes.
-    // Shorter codes seem not to benefit from rle.
-    DecideOverRleUse(depth, new_length,
-                     &use_rle_for_non_zero, &use_rle_for_zero);
-  }
-  // Actual rle coding.
-  for (size_t i = 0; i < new_length;) {
-    const uint8_t value = depth[i];
-    size_t reps = 1;
-    if ((value != 0 && use_rle_for_non_zero) ||
-        (value == 0 && use_rle_for_zero)) {
-      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
-        ++reps;
-      }
-    }
-    if (value == 0) {
-      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
-    } else {
-      WriteHuffmanTreeRepetitions(previous_value,
-                                  value, reps, tree_size,
-                                  tree, extra_bits_data);
-      previous_value = value;
-    }
-    i += reps;
-  }
-}
-namespace {
-uint16_t ReverseBits(int num_bits, uint16_t bits) {
-  static const size_t kLut[16] = {  // Pre-reversed 4-bit values.
-    0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
-    0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
-  };
-  size_t retval = kLut[bits & 0xf];
-  for (int i = 4; i < num_bits; i += 4) {
-    retval <<= 4;
-    bits = static_cast<uint16_t>(bits >> 4);
-    retval |= kLut[bits & 0xf];
-  }
-  retval >>= (-num_bits & 0x3);
-  return static_cast<uint16_t>(retval);
-}
-}  // namespace
-void ConvertBitDepthsToSymbols(const uint8_t *depth,
-                               size_t len,
-                               uint16_t *bits) {
-  // In Brotli, all bit depths are [1..15]
-  // 0 bit depth means that the symbol does not exist.
-  const int kMaxBits = 16;  // 0..15 are values for bits
-  uint16_t bl_count[kMaxBits] = { 0 };
-  {
-    for (size_t i = 0; i < len; ++i) {
-      ++bl_count[depth[i]];
-    }
-    bl_count[0] = 0;
-  }
-  uint16_t next_code[kMaxBits];
-  next_code[0] = 0;
-  {
-    int code = 0;
-    for (int bits = 1; bits < kMaxBits; ++bits) {
-      code = (code + bl_count[bits - 1]) << 1;
-      next_code[bits] = static_cast<uint16_t>(code);
-    }
-  }
-  for (size_t i = 0; i < len; ++i) {
-    if (depth[i]) {
-      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
-    }
-  }
-}
-}  // namespace brotli

data/vendor/brotli/enc/histogram.cc DELETED Viewed

@@ -1,67 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-// Build per-context histograms of literals, commands and distance codes.
-#include "./histogram.h"
-#include <cmath>
-#include "./block_splitter.h"
-#include "./command.h"
-#include "./context.h"
-#include "./prefix.h"
-namespace brotli {
-void BuildHistograms(
-    const Command* cmds,
-    const size_t num_commands,
-    const BlockSplit& literal_split,
-    const BlockSplit& insert_and_copy_split,
-    const BlockSplit& dist_split,
-    const uint8_t* ringbuffer,
-    size_t start_pos,
-    size_t mask,
-    uint8_t prev_byte,
-    uint8_t prev_byte2,
-    const std::vector<ContextType>& context_modes,
-    std::vector<HistogramLiteral>* literal_histograms,
-    std::vector<HistogramCommand>* insert_and_copy_histograms,
-    std::vector<HistogramDistance>* copy_dist_histograms) {
-  size_t pos = start_pos;
-  BlockSplitIterator literal_it(literal_split);
-  BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
-  BlockSplitIterator dist_it(dist_split);
-  for (size_t i = 0; i < num_commands; ++i) {
-    const Command &cmd = cmds[i];
-    insert_and_copy_it.Next();
-    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
-        cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
-      literal_it.Next();
-      size_t context = (literal_it.type_ << kLiteralContextBits) +
-          Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
-      (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
-      prev_byte2 = prev_byte;
-      prev_byte = ringbuffer[pos & mask];
-      ++pos;
-    }
-    pos += cmd.copy_len();
-    if (cmd.copy_len()) {
-      prev_byte2 = ringbuffer[(pos - 2) & mask];
-      prev_byte = ringbuffer[(pos - 1) & mask];
-      if (cmd.cmd_prefix_ >= 128) {
-        dist_it.Next();
-        size_t context = (dist_it.type_ << kDistanceContextBits) +
-            cmd.DistanceContext();
-        (*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
-      }
-    }
-  }
-}
-}  // namespace brotli

data/vendor/brotli/enc/literal_cost.cc DELETED Viewed

@@ -1,165 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-// Literal cost model to allow backward reference replacement to be efficient.
-#include "./literal_cost.h"
-#include <math.h>
-#include <algorithm>
-#include "./fast_log.h"
-#include "./types.h"
-#include "./utf8_util.h"
-namespace brotli {
-static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
-  if (c < 128) {
-    return 0;  // Next one is the 'Byte 1' again.
-  } else if (c >= 192) {  // Next one is the 'Byte 2' of utf-8 encoding.
-    return std::min<size_t>(1, clamp);
-  } else {
-    // Let's decide over the last byte if this ends the sequence.
-    if (last < 0xe0) {
-      return 0;  // Completed two or three byte coding.
-    } else {  // Next one is the 'Byte 3' of utf-8 encoding.
-      return std::min<size_t>(2, clamp);
-    }
-  }
-}
-static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
-                                        const uint8_t *data) {
-  size_t counts[3] = { 0 };
-  size_t max_utf8 = 1;  // should be 2, but 1 compresses better.
-  size_t last_c = 0;
-  size_t utf8_pos = 0;
-  for (size_t i = 0; i < len; ++i) {
-    size_t c = data[(pos + i) & mask];
-    utf8_pos = UTF8Position(last_c, c, 2);
-    ++counts[utf8_pos];
-    last_c = c;
-  }
-  if (counts[2] < 500) {
-    max_utf8 = 1;
-  }
-  if (counts[1] + counts[2] < 25) {
-    max_utf8 = 0;
-  }
-  return max_utf8;
-}
-static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
-                                            const uint8_t *data, float *cost) {
-  // max_utf8 is 0 (normal ascii single byte modeling),
-  // 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
-  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
-  size_t histogram[3][256] = { { 0 } };
-  size_t window_half = 495;
-  size_t in_window = std::min(window_half, len);
-  size_t in_window_utf8[3] = { 0 };
-  // Bootstrap histograms.
-  size_t last_c = 0;
-  size_t utf8_pos = 0;
-  for (size_t i = 0; i < in_window; ++i) {
-    size_t c = data[(pos + i) & mask];
-    ++histogram[utf8_pos][c];
-    ++in_window_utf8[utf8_pos];
-    utf8_pos = UTF8Position(last_c, c, max_utf8);
-    last_c = c;
-  }
-  // Compute bit costs with sliding window.
-  for (size_t i = 0; i < len; ++i) {
-    if (i >= window_half) {
-      // Remove a byte in the past.
-      size_t c = i < window_half + 1 ?
-          0 : data[(pos + i - window_half - 1) & mask];
-      size_t last_c = i < window_half + 2 ?
-          0 : data[(pos + i - window_half - 2) & mask];
-      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
-      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
-      --in_window_utf8[utf8_pos2];
-    }
-    if (i + window_half < len) {
-      // Add a byte in the future.
-      size_t c = data[(pos + i + window_half - 1) & mask];
-      size_t last_c = data[(pos + i + window_half - 2) & mask];
-      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
-      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
-      ++in_window_utf8[utf8_pos2];
-    }
-    size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
-    size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
-    size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
-    size_t masked_pos = (pos + i) & mask;
-    size_t histo = histogram[utf8_pos][data[masked_pos]];
-    if (histo == 0) {
-      histo = 1;
-    }
-    double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
-    lit_cost += 0.02905;
-    if (lit_cost < 1.0) {
-      lit_cost *= 0.5;
-      lit_cost += 0.5;
-    }
-    // Make the first bytes more expensive -- seems to help, not sure why.
-    // Perhaps because the entropy source is changing its properties
-    // rapidly in the beginning of the file, perhaps because the beginning
-    // of the data is a statistical "anomaly".
-    if (i < 2000) {
-      lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
-    }
-    cost[i] = static_cast<float>(lit_cost);
-  }
-}
-void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
-                                 const uint8_t *data, float *cost) {
-  if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
-    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
-    return;
-  }
-  size_t histogram[256] = { 0 };
-  size_t window_half = 2000;
-  size_t in_window = std::min(window_half, len);
-  // Bootstrap histogram.
-  for (size_t i = 0; i < in_window; ++i) {
-    ++histogram[data[(pos + i) & mask]];
-  }
-  // Compute bit costs with sliding window.
-  for (size_t i = 0; i < len; ++i) {
-    if (i >= window_half) {
-      // Remove a byte in the past.
-      --histogram[data[(pos + i - window_half) & mask]];
-      --in_window;
-    }
-    if (i + window_half < len) {
-      // Add a byte in the future.
-      ++histogram[data[(pos + i + window_half) & mask]];
-      ++in_window;
-    }
-    size_t histo = histogram[data[(pos + i) & mask]];
-    if (histo == 0) {
-      histo = 1;
-    }
-    double lit_cost = FastLog2(in_window) - FastLog2(histo);
-    lit_cost += 0.029;
-    if (lit_cost < 1.0) {
-      lit_cost *= 0.5;
-      lit_cost += 0.5;
-    }
-    cost[i] = static_cast<float>(lit_cost);
-  }
-}
-}  // namespace brotli