brotli 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/ext/brotli/brotli.cc +114 -24
  3. data/ext/brotli/brotli.h +0 -1
  4. data/ext/brotli/extconf.rb +30 -23
  5. data/lib/brotli/version.rb +1 -1
  6. data/vendor/brotli/LICENSE +1 -1
  7. data/vendor/brotli/dec/Makefile +1 -1
  8. data/vendor/brotli/dec/bit_reader.c +3 -3
  9. data/vendor/brotli/dec/bit_reader.h +25 -27
  10. data/vendor/brotli/dec/context.h +4 -4
  11. data/vendor/brotli/dec/decode.c +410 -486
  12. data/vendor/brotli/dec/decode.h +101 -105
  13. data/vendor/brotli/dec/dictionary.c +1 -1
  14. data/vendor/brotli/dec/dictionary.h +7 -8
  15. data/vendor/brotli/dec/huffman.c +103 -105
  16. data/vendor/brotli/dec/huffman.h +18 -18
  17. data/vendor/brotli/dec/port.h +52 -40
  18. data/vendor/brotli/dec/prefix.h +2 -0
  19. data/vendor/brotli/dec/state.c +13 -19
  20. data/vendor/brotli/dec/state.h +25 -39
  21. data/vendor/brotli/dec/transform.h +38 -44
  22. data/vendor/brotli/dec/types.h +2 -2
  23. data/vendor/brotli/enc/Makefile +1 -1
  24. data/vendor/brotli/enc/backward_references.cc +455 -359
  25. data/vendor/brotli/enc/backward_references.h +79 -3
  26. data/vendor/brotli/enc/bit_cost.h +54 -32
  27. data/vendor/brotli/enc/block_splitter.cc +285 -193
  28. data/vendor/brotli/enc/block_splitter.h +4 -12
  29. data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
  30. data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
  31. data/vendor/brotli/enc/cluster.h +161 -120
  32. data/vendor/brotli/enc/command.h +60 -37
  33. data/vendor/brotli/enc/compress_fragment.cc +701 -0
  34. data/vendor/brotli/enc/compress_fragment.h +47 -0
  35. data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
  36. data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
  37. data/vendor/brotli/enc/compressor.h +15 -0
  38. data/vendor/brotli/enc/context.h +1 -1
  39. data/vendor/brotli/enc/dictionary.h +2 -2
  40. data/vendor/brotli/enc/encode.cc +819 -286
  41. data/vendor/brotli/enc/encode.h +38 -15
  42. data/vendor/brotli/enc/encode_parallel.cc +40 -42
  43. data/vendor/brotli/enc/entropy_encode.cc +144 -147
  44. data/vendor/brotli/enc/entropy_encode.h +32 -8
  45. data/vendor/brotli/enc/entropy_encode_static.h +572 -0
  46. data/vendor/brotli/enc/fast_log.h +7 -40
  47. data/vendor/brotli/enc/find_match_length.h +9 -9
  48. data/vendor/brotli/enc/hash.h +462 -154
  49. data/vendor/brotli/enc/histogram.cc +6 -6
  50. data/vendor/brotli/enc/histogram.h +13 -13
  51. data/vendor/brotli/enc/literal_cost.cc +45 -45
  52. data/vendor/brotli/enc/metablock.cc +92 -89
  53. data/vendor/brotli/enc/metablock.h +12 -12
  54. data/vendor/brotli/enc/port.h +7 -16
  55. data/vendor/brotli/enc/prefix.h +23 -22
  56. data/vendor/brotli/enc/ringbuffer.h +75 -29
  57. data/vendor/brotli/enc/static_dict.cc +56 -48
  58. data/vendor/brotli/enc/static_dict.h +5 -5
  59. data/vendor/brotli/enc/streams.cc +1 -1
  60. data/vendor/brotli/enc/streams.h +5 -5
  61. data/vendor/brotli/enc/transform.h +40 -35
  62. data/vendor/brotli/enc/types.h +2 -0
  63. data/vendor/brotli/enc/utf8_util.cc +3 -2
  64. data/vendor/brotli/enc/write_bits.h +6 -6
  65. metadata +9 -5
  66. data/vendor/brotli/dec/streams.c +0 -102
  67. data/vendor/brotli/dec/streams.h +0 -95
@@ -23,17 +23,93 @@ namespace brotli {
23
23
  // by this call.
24
24
  void CreateBackwardReferences(size_t num_bytes,
25
25
  size_t position,
26
+ bool is_last,
26
27
  const uint8_t* ringbuffer,
27
28
  size_t ringbuffer_mask,
28
- const size_t max_backward_limit,
29
29
  const int quality,
30
+ const int lgwin,
30
31
  Hashers* hashers,
31
32
  int hash_type,
32
33
  int* dist_cache,
33
- int* last_insert_len,
34
+ size_t* last_insert_len,
34
35
  Command* commands,
35
36
  size_t* num_commands,
36
- int* num_literals);
37
+ size_t* num_literals);
38
+
39
+ static const float kInfinity = std::numeric_limits<float>::infinity();
40
+
41
+ struct ZopfliNode {
42
+ ZopfliNode(void) : length(1),
43
+ distance(0),
44
+ insert_length(0),
45
+ cost(kInfinity) {}
46
+
47
+ inline uint32_t copy_length() const {
48
+ return length & 0xffffff;
49
+ }
50
+
51
+ inline uint32_t length_code() const {
52
+ const uint32_t modifier = length >> 24;
53
+ return copy_length() + 9u - modifier;
54
+ }
55
+
56
+ inline uint32_t copy_distance() const {
57
+ return distance & 0x1ffffff;
58
+ }
59
+
60
+ inline uint32_t distance_code() const {
61
+ const uint32_t short_code = distance >> 25;
62
+ return short_code == 0 ? copy_distance() + 15 : short_code - 1;
63
+ }
64
+
65
+ inline uint32_t command_length() const {
66
+ return copy_length() + insert_length;
67
+ }
68
+
69
+ // best length to get up to this byte (not including this byte itself)
70
+ // highest 8 bit is used to reconstruct the length code
71
+ uint32_t length;
72
+ // distance associated with the length
73
+ // highest 7 bit contains distance short code + 1 (or zero if no short code)
74
+ uint32_t distance;
75
+ // number of literal inserts before this copy
76
+ uint32_t insert_length;
77
+ // smallest cost to get to this byte from the beginning, as found so far
78
+ float cost;
79
+ };
80
+
81
+ // Computes the shortest path of commands from position to at most
82
+ // position + num_bytes.
83
+ //
84
+ // On return, path->size() is the number of commands found and path[i] is the
85
+ // length of the ith command (copy length plus insert length).
86
+ // Note that the sum of the lengths of all commands can be less than num_bytes.
87
+ //
88
+ // On return, the nodes[0..num_bytes] array will have the following
89
+ // "ZopfliNode array invariant":
90
+ // For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
91
+ // (1) nodes[i].copy_length() >= 2
92
+ // (2) nodes[i].command_length() <= i and
93
+ // (3) nodes[i - nodes[i].command_length()].cost < kInfinity
94
+ void ZopfliComputeShortestPath(size_t num_bytes,
95
+ size_t position,
96
+ const uint8_t* ringbuffer,
97
+ size_t ringbuffer_mask,
98
+ const size_t max_backward_limit,
99
+ const int* dist_cache,
100
+ Hashers::H10* hasher,
101
+ ZopfliNode* nodes,
102
+ std::vector<uint32_t>* path);
103
+
104
+ void ZopfliCreateCommands(const size_t num_bytes,
105
+ const size_t block_start,
106
+ const size_t max_backward_limit,
107
+ const std::vector<uint32_t>& path,
108
+ const ZopfliNode* nodes,
109
+ int* dist_cache,
110
+ size_t* last_insert_len,
111
+ Command* commands,
112
+ size_t* num_literals);
37
113
 
38
114
  } // namespace brotli
39
115
 
@@ -9,89 +9,111 @@
9
9
  #ifndef BROTLI_ENC_BIT_COST_H_
10
10
  #define BROTLI_ENC_BIT_COST_H_
11
11
 
12
-
13
-
14
12
  #include "./entropy_encode.h"
15
13
  #include "./fast_log.h"
16
14
  #include "./types.h"
17
15
 
18
16
  namespace brotli {
19
17
 
20
- static inline double ShannonEntropy(const int *population, int size,
21
- int *total) {
22
- int sum = 0;
18
+ static inline double ShannonEntropy(const uint32_t *population, size_t size,
19
+ size_t *total) {
20
+ size_t sum = 0;
23
21
  double retval = 0;
24
- const int *population_end = population + size;
25
- int p;
22
+ const uint32_t *population_end = population + size;
23
+ size_t p;
26
24
  if (size & 1) {
27
25
  goto odd_number_of_elements_left;
28
26
  }
29
27
  while (population < population_end) {
30
28
  p = *population++;
31
29
  sum += p;
32
- retval -= p * FastLog2(p);
30
+ retval -= static_cast<double>(p) * FastLog2(p);
33
31
  odd_number_of_elements_left:
34
32
  p = *population++;
35
33
  sum += p;
36
- retval -= p * FastLog2(p);
34
+ retval -= static_cast<double>(p) * FastLog2(p);
37
35
  }
38
- if (sum) retval += sum * FastLog2(sum);
36
+ if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
39
37
  *total = sum;
40
38
  return retval;
41
39
  }
42
40
 
43
- static inline double BitsEntropy(const int *population, int size) {
44
- int sum;
41
+ static inline double BitsEntropy(const uint32_t *population, size_t size) {
42
+ size_t sum;
45
43
  double retval = ShannonEntropy(population, size, &sum);
46
44
  if (retval < sum) {
47
45
  // At least one bit per literal is needed.
48
- retval = sum;
46
+ retval = static_cast<double>(sum);
49
47
  }
50
48
  return retval;
51
49
  }
52
50
 
53
-
54
51
  template<int kSize>
55
52
  double PopulationCost(const Histogram<kSize>& histogram) {
53
+ static const double kOneSymbolHistogramCost = 12;
54
+ static const double kTwoSymbolHistogramCost = 20;
55
+ static const double kThreeSymbolHistogramCost = 28;
56
+ static const double kFourSymbolHistogramCost = 37;
56
57
  if (histogram.total_count_ == 0) {
57
- return 12;
58
+ return kOneSymbolHistogramCost;
58
59
  }
59
60
  int count = 0;
61
+ int s[5];
60
62
  for (int i = 0; i < kSize; ++i) {
61
63
  if (histogram.data_[i] > 0) {
64
+ s[count] = i;
62
65
  ++count;
66
+ if (count > 4) break;
63
67
  }
64
68
  }
65
69
  if (count == 1) {
66
- return 12;
70
+ return kOneSymbolHistogramCost;
67
71
  }
68
72
  if (count == 2) {
69
- return 20 + histogram.total_count_;
73
+ return (kTwoSymbolHistogramCost +
74
+ static_cast<double>(histogram.total_count_));
70
75
  }
71
- double bits = 0;
72
- uint8_t depth_array[kSize] = { 0 };
73
- if (count <= 4) {
74
- // For very low symbol count we build the Huffman tree.
75
- CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth_array);
76
- for (int i = 0; i < kSize; ++i) {
77
- bits += histogram.data_[i] * depth_array[i];
76
+ if (count == 3) {
77
+ const uint32_t histo0 = histogram.data_[s[0]];
78
+ const uint32_t histo1 = histogram.data_[s[1]];
79
+ const uint32_t histo2 = histogram.data_[s[2]];
80
+ const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
81
+ return (kThreeSymbolHistogramCost +
82
+ 2 * (histo0 + histo1 + histo2) - histomax);
83
+ }
84
+ if (count == 4) {
85
+ uint32_t histo[4];
86
+ for (int i = 0; i < 4; ++i) {
87
+ histo[i] = histogram.data_[s[i]];
78
88
  }
79
- return count == 3 ? bits + 28 : bits + 37;
89
+ // Sort
90
+ for (int i = 0; i < 4; ++i) {
91
+ for (int j = i + 1; j < 4; ++j) {
92
+ if (histo[j] > histo[i]) {
93
+ std::swap(histo[j], histo[i]);
94
+ }
95
+ }
96
+ }
97
+ const uint32_t h23 = histo[2] + histo[3];
98
+ const uint32_t histomax = std::max(h23, histo[0]);
99
+ return (kFourSymbolHistogramCost +
100
+ 3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
80
101
  }
81
102
 
82
103
  // In this loop we compute the entropy of the histogram and simultaneously
83
104
  // build a simplified histogram of the code length codes where we use the
84
105
  // zero repeat code 17, but we don't use the non-zero repeat code 16.
85
- int max_depth = 1;
86
- int depth_histo[kCodeLengthCodes] = { 0 };
106
+ double bits = 0;
107
+ size_t max_depth = 1;
108
+ uint32_t depth_histo[kCodeLengthCodes] = { 0 };
87
109
  const double log2total = FastLog2(histogram.total_count_);
88
- for (int i = 0; i < kSize;) {
110
+ for (size_t i = 0; i < kSize;) {
89
111
  if (histogram.data_[i] > 0) {
90
112
  // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
91
113
  // = log2(total_count) - log2(count(symbol))
92
114
  double log2p = log2total - FastLog2(histogram.data_[i]);
93
115
  // Approximate the bit depth by round(-log2(P(symbol)))
94
- int depth = static_cast<int>(log2p + 0.5);
116
+ size_t depth = static_cast<size_t>(log2p + 0.5);
95
117
  bits += histogram.data_[i] * log2p;
96
118
  if (depth > 15) {
97
119
  depth = 15;
@@ -104,8 +126,8 @@ double PopulationCost(const Histogram<kSize>& histogram) {
104
126
  } else {
105
127
  // Compute the run length of zeros and add the appropriate number of 0 and
106
128
  // 17 code length codes to the code length code histogram.
107
- int reps = 1;
108
- for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
129
+ uint32_t reps = 1;
130
+ for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
109
131
  ++reps;
110
132
  }
111
133
  i += reps;
@@ -128,7 +150,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
128
150
  }
129
151
  }
130
152
  // Add the estimated encoding cost of the code length code histogram.
131
- bits += 18 + 2 * max_depth;
153
+ bits += static_cast<double>(18 + 2 * max_depth);
132
154
  // Add the entropy of the code length code histogram.
133
155
  bits += BitsEntropy(depth_histo, kCodeLengthCodes);
134
156
  return bits;