brotli 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/ext/brotli/brotli.cc +114 -24
  3. data/ext/brotli/brotli.h +0 -1
  4. data/ext/brotli/extconf.rb +30 -23
  5. data/lib/brotli/version.rb +1 -1
  6. data/vendor/brotli/LICENSE +1 -1
  7. data/vendor/brotli/dec/Makefile +1 -1
  8. data/vendor/brotli/dec/bit_reader.c +3 -3
  9. data/vendor/brotli/dec/bit_reader.h +25 -27
  10. data/vendor/brotli/dec/context.h +4 -4
  11. data/vendor/brotli/dec/decode.c +410 -486
  12. data/vendor/brotli/dec/decode.h +101 -105
  13. data/vendor/brotli/dec/dictionary.c +1 -1
  14. data/vendor/brotli/dec/dictionary.h +7 -8
  15. data/vendor/brotli/dec/huffman.c +103 -105
  16. data/vendor/brotli/dec/huffman.h +18 -18
  17. data/vendor/brotli/dec/port.h +52 -40
  18. data/vendor/brotli/dec/prefix.h +2 -0
  19. data/vendor/brotli/dec/state.c +13 -19
  20. data/vendor/brotli/dec/state.h +25 -39
  21. data/vendor/brotli/dec/transform.h +38 -44
  22. data/vendor/brotli/dec/types.h +2 -2
  23. data/vendor/brotli/enc/Makefile +1 -1
  24. data/vendor/brotli/enc/backward_references.cc +455 -359
  25. data/vendor/brotli/enc/backward_references.h +79 -3
  26. data/vendor/brotli/enc/bit_cost.h +54 -32
  27. data/vendor/brotli/enc/block_splitter.cc +285 -193
  28. data/vendor/brotli/enc/block_splitter.h +4 -12
  29. data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
  30. data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
  31. data/vendor/brotli/enc/cluster.h +161 -120
  32. data/vendor/brotli/enc/command.h +60 -37
  33. data/vendor/brotli/enc/compress_fragment.cc +701 -0
  34. data/vendor/brotli/enc/compress_fragment.h +47 -0
  35. data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
  36. data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
  37. data/vendor/brotli/enc/compressor.h +15 -0
  38. data/vendor/brotli/enc/context.h +1 -1
  39. data/vendor/brotli/enc/dictionary.h +2 -2
  40. data/vendor/brotli/enc/encode.cc +819 -286
  41. data/vendor/brotli/enc/encode.h +38 -15
  42. data/vendor/brotli/enc/encode_parallel.cc +40 -42
  43. data/vendor/brotli/enc/entropy_encode.cc +144 -147
  44. data/vendor/brotli/enc/entropy_encode.h +32 -8
  45. data/vendor/brotli/enc/entropy_encode_static.h +572 -0
  46. data/vendor/brotli/enc/fast_log.h +7 -40
  47. data/vendor/brotli/enc/find_match_length.h +9 -9
  48. data/vendor/brotli/enc/hash.h +462 -154
  49. data/vendor/brotli/enc/histogram.cc +6 -6
  50. data/vendor/brotli/enc/histogram.h +13 -13
  51. data/vendor/brotli/enc/literal_cost.cc +45 -45
  52. data/vendor/brotli/enc/metablock.cc +92 -89
  53. data/vendor/brotli/enc/metablock.h +12 -12
  54. data/vendor/brotli/enc/port.h +7 -16
  55. data/vendor/brotli/enc/prefix.h +23 -22
  56. data/vendor/brotli/enc/ringbuffer.h +75 -29
  57. data/vendor/brotli/enc/static_dict.cc +56 -48
  58. data/vendor/brotli/enc/static_dict.h +5 -5
  59. data/vendor/brotli/enc/streams.cc +1 -1
  60. data/vendor/brotli/enc/streams.h +5 -5
  61. data/vendor/brotli/enc/transform.h +40 -35
  62. data/vendor/brotli/enc/types.h +2 -0
  63. data/vendor/brotli/enc/utf8_util.cc +3 -2
  64. data/vendor/brotli/enc/write_bits.h +6 -6
  65. metadata +9 -5
  66. data/vendor/brotli/dec/streams.c +0 -102
  67. data/vendor/brotli/dec/streams.h +0 -95
@@ -23,17 +23,93 @@ namespace brotli {
23
23
  // by this call.
24
24
  void CreateBackwardReferences(size_t num_bytes,
25
25
  size_t position,
26
+ bool is_last,
26
27
  const uint8_t* ringbuffer,
27
28
  size_t ringbuffer_mask,
28
- const size_t max_backward_limit,
29
29
  const int quality,
30
+ const int lgwin,
30
31
  Hashers* hashers,
31
32
  int hash_type,
32
33
  int* dist_cache,
33
- int* last_insert_len,
34
+ size_t* last_insert_len,
34
35
  Command* commands,
35
36
  size_t* num_commands,
36
- int* num_literals);
37
+ size_t* num_literals);
38
+
39
+ static const float kInfinity = std::numeric_limits<float>::infinity();
40
+
41
+ struct ZopfliNode {
42
+ ZopfliNode(void) : length(1),
43
+ distance(0),
44
+ insert_length(0),
45
+ cost(kInfinity) {}
46
+
47
+ inline uint32_t copy_length() const {
48
+ return length & 0xffffff;
49
+ }
50
+
51
+ inline uint32_t length_code() const {
52
+ const uint32_t modifier = length >> 24;
53
+ return copy_length() + 9u - modifier;
54
+ }
55
+
56
+ inline uint32_t copy_distance() const {
57
+ return distance & 0x1ffffff;
58
+ }
59
+
60
+ inline uint32_t distance_code() const {
61
+ const uint32_t short_code = distance >> 25;
62
+ return short_code == 0 ? copy_distance() + 15 : short_code - 1;
63
+ }
64
+
65
+ inline uint32_t command_length() const {
66
+ return copy_length() + insert_length;
67
+ }
68
+
69
+ // best length to get up to this byte (not including this byte itself)
70
+ // highest 8 bit is used to reconstruct the length code
71
+ uint32_t length;
72
+ // distance associated with the length
73
+ // highest 7 bit contains distance short code + 1 (or zero if no short code)
74
+ uint32_t distance;
75
+ // number of literal inserts before this copy
76
+ uint32_t insert_length;
77
+ // smallest cost to get to this byte from the beginning, as found so far
78
+ float cost;
79
+ };
80
+
81
+ // Computes the shortest path of commands from position to at most
82
+ // position + num_bytes.
83
+ //
84
+ // On return, path->size() is the number of commands found and path[i] is the
85
+ // length of the ith command (copy length plus insert length).
86
+ // Note that the sum of the lengths of all commands can be less than num_bytes.
87
+ //
88
+ // On return, the nodes[0..num_bytes] array will have the following
89
+ // "ZopfliNode array invariant":
90
+ // For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
91
+ // (1) nodes[i].copy_length() >= 2
92
+ // (2) nodes[i].command_length() <= i and
93
+ // (3) nodes[i - nodes[i].command_length()].cost < kInfinity
94
+ void ZopfliComputeShortestPath(size_t num_bytes,
95
+ size_t position,
96
+ const uint8_t* ringbuffer,
97
+ size_t ringbuffer_mask,
98
+ const size_t max_backward_limit,
99
+ const int* dist_cache,
100
+ Hashers::H10* hasher,
101
+ ZopfliNode* nodes,
102
+ std::vector<uint32_t>* path);
103
+
104
+ void ZopfliCreateCommands(const size_t num_bytes,
105
+ const size_t block_start,
106
+ const size_t max_backward_limit,
107
+ const std::vector<uint32_t>& path,
108
+ const ZopfliNode* nodes,
109
+ int* dist_cache,
110
+ size_t* last_insert_len,
111
+ Command* commands,
112
+ size_t* num_literals);
37
113
 
38
114
  } // namespace brotli
39
115
 
@@ -9,89 +9,111 @@
9
9
  #ifndef BROTLI_ENC_BIT_COST_H_
10
10
  #define BROTLI_ENC_BIT_COST_H_
11
11
 
12
-
13
-
14
12
  #include "./entropy_encode.h"
15
13
  #include "./fast_log.h"
16
14
  #include "./types.h"
17
15
 
18
16
  namespace brotli {
19
17
 
20
- static inline double ShannonEntropy(const int *population, int size,
21
- int *total) {
22
- int sum = 0;
18
+ static inline double ShannonEntropy(const uint32_t *population, size_t size,
19
+ size_t *total) {
20
+ size_t sum = 0;
23
21
  double retval = 0;
24
- const int *population_end = population + size;
25
- int p;
22
+ const uint32_t *population_end = population + size;
23
+ size_t p;
26
24
  if (size & 1) {
27
25
  goto odd_number_of_elements_left;
28
26
  }
29
27
  while (population < population_end) {
30
28
  p = *population++;
31
29
  sum += p;
32
- retval -= p * FastLog2(p);
30
+ retval -= static_cast<double>(p) * FastLog2(p);
33
31
  odd_number_of_elements_left:
34
32
  p = *population++;
35
33
  sum += p;
36
- retval -= p * FastLog2(p);
34
+ retval -= static_cast<double>(p) * FastLog2(p);
37
35
  }
38
- if (sum) retval += sum * FastLog2(sum);
36
+ if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
39
37
  *total = sum;
40
38
  return retval;
41
39
  }
42
40
 
43
- static inline double BitsEntropy(const int *population, int size) {
44
- int sum;
41
+ static inline double BitsEntropy(const uint32_t *population, size_t size) {
42
+ size_t sum;
45
43
  double retval = ShannonEntropy(population, size, &sum);
46
44
  if (retval < sum) {
47
45
  // At least one bit per literal is needed.
48
- retval = sum;
46
+ retval = static_cast<double>(sum);
49
47
  }
50
48
  return retval;
51
49
  }
52
50
 
53
-
54
51
  template<int kSize>
55
52
  double PopulationCost(const Histogram<kSize>& histogram) {
53
+ static const double kOneSymbolHistogramCost = 12;
54
+ static const double kTwoSymbolHistogramCost = 20;
55
+ static const double kThreeSymbolHistogramCost = 28;
56
+ static const double kFourSymbolHistogramCost = 37;
56
57
  if (histogram.total_count_ == 0) {
57
- return 12;
58
+ return kOneSymbolHistogramCost;
58
59
  }
59
60
  int count = 0;
61
+ int s[5];
60
62
  for (int i = 0; i < kSize; ++i) {
61
63
  if (histogram.data_[i] > 0) {
64
+ s[count] = i;
62
65
  ++count;
66
+ if (count > 4) break;
63
67
  }
64
68
  }
65
69
  if (count == 1) {
66
- return 12;
70
+ return kOneSymbolHistogramCost;
67
71
  }
68
72
  if (count == 2) {
69
- return 20 + histogram.total_count_;
73
+ return (kTwoSymbolHistogramCost +
74
+ static_cast<double>(histogram.total_count_));
70
75
  }
71
- double bits = 0;
72
- uint8_t depth_array[kSize] = { 0 };
73
- if (count <= 4) {
74
- // For very low symbol count we build the Huffman tree.
75
- CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth_array);
76
- for (int i = 0; i < kSize; ++i) {
77
- bits += histogram.data_[i] * depth_array[i];
76
+ if (count == 3) {
77
+ const uint32_t histo0 = histogram.data_[s[0]];
78
+ const uint32_t histo1 = histogram.data_[s[1]];
79
+ const uint32_t histo2 = histogram.data_[s[2]];
80
+ const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
81
+ return (kThreeSymbolHistogramCost +
82
+ 2 * (histo0 + histo1 + histo2) - histomax);
83
+ }
84
+ if (count == 4) {
85
+ uint32_t histo[4];
86
+ for (int i = 0; i < 4; ++i) {
87
+ histo[i] = histogram.data_[s[i]];
78
88
  }
79
- return count == 3 ? bits + 28 : bits + 37;
89
+ // Sort
90
+ for (int i = 0; i < 4; ++i) {
91
+ for (int j = i + 1; j < 4; ++j) {
92
+ if (histo[j] > histo[i]) {
93
+ std::swap(histo[j], histo[i]);
94
+ }
95
+ }
96
+ }
97
+ const uint32_t h23 = histo[2] + histo[3];
98
+ const uint32_t histomax = std::max(h23, histo[0]);
99
+ return (kFourSymbolHistogramCost +
100
+ 3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
80
101
  }
81
102
 
82
103
  // In this loop we compute the entropy of the histogram and simultaneously
83
104
  // build a simplified histogram of the code length codes where we use the
84
105
  // zero repeat code 17, but we don't use the non-zero repeat code 16.
85
- int max_depth = 1;
86
- int depth_histo[kCodeLengthCodes] = { 0 };
106
+ double bits = 0;
107
+ size_t max_depth = 1;
108
+ uint32_t depth_histo[kCodeLengthCodes] = { 0 };
87
109
  const double log2total = FastLog2(histogram.total_count_);
88
- for (int i = 0; i < kSize;) {
110
+ for (size_t i = 0; i < kSize;) {
89
111
  if (histogram.data_[i] > 0) {
90
112
  // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
91
113
  // = log2(total_count) - log2(count(symbol))
92
114
  double log2p = log2total - FastLog2(histogram.data_[i]);
93
115
  // Approximate the bit depth by round(-log2(P(symbol)))
94
- int depth = static_cast<int>(log2p + 0.5);
116
+ size_t depth = static_cast<size_t>(log2p + 0.5);
95
117
  bits += histogram.data_[i] * log2p;
96
118
  if (depth > 15) {
97
119
  depth = 15;
@@ -104,8 +126,8 @@ double PopulationCost(const Histogram<kSize>& histogram) {
104
126
  } else {
105
127
  // Compute the run length of zeros and add the appropriate number of 0 and
106
128
  // 17 code length codes to the code length code histogram.
107
- int reps = 1;
108
- for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
129
+ uint32_t reps = 1;
130
+ for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
109
131
  ++reps;
110
132
  }
111
133
  i += reps;
@@ -128,7 +150,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
128
150
  }
129
151
  }
130
152
  // Add the estimated encoding cost of the code length code histogram.
131
- bits += 18 + 2 * max_depth;
153
+ bits += static_cast<double>(18 + 2 * max_depth);
132
154
  // Add the entropy of the code length code histogram.
133
155
  bits += BitsEntropy(depth_histo, kCodeLengthCodes);
134
156
  return bits;