brotli 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/ext/brotli/brotli.cc +114 -24
  3. data/ext/brotli/brotli.h +0 -1
  4. data/ext/brotli/extconf.rb +30 -23
  5. data/lib/brotli/version.rb +1 -1
  6. data/vendor/brotli/LICENSE +1 -1
  7. data/vendor/brotli/dec/Makefile +1 -1
  8. data/vendor/brotli/dec/bit_reader.c +3 -3
  9. data/vendor/brotli/dec/bit_reader.h +25 -27
  10. data/vendor/brotli/dec/context.h +4 -4
  11. data/vendor/brotli/dec/decode.c +410 -486
  12. data/vendor/brotli/dec/decode.h +101 -105
  13. data/vendor/brotli/dec/dictionary.c +1 -1
  14. data/vendor/brotli/dec/dictionary.h +7 -8
  15. data/vendor/brotli/dec/huffman.c +103 -105
  16. data/vendor/brotli/dec/huffman.h +18 -18
  17. data/vendor/brotli/dec/port.h +52 -40
  18. data/vendor/brotli/dec/prefix.h +2 -0
  19. data/vendor/brotli/dec/state.c +13 -19
  20. data/vendor/brotli/dec/state.h +25 -39
  21. data/vendor/brotli/dec/transform.h +38 -44
  22. data/vendor/brotli/dec/types.h +2 -2
  23. data/vendor/brotli/enc/Makefile +1 -1
  24. data/vendor/brotli/enc/backward_references.cc +455 -359
  25. data/vendor/brotli/enc/backward_references.h +79 -3
  26. data/vendor/brotli/enc/bit_cost.h +54 -32
  27. data/vendor/brotli/enc/block_splitter.cc +285 -193
  28. data/vendor/brotli/enc/block_splitter.h +4 -12
  29. data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
  30. data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
  31. data/vendor/brotli/enc/cluster.h +161 -120
  32. data/vendor/brotli/enc/command.h +60 -37
  33. data/vendor/brotli/enc/compress_fragment.cc +701 -0
  34. data/vendor/brotli/enc/compress_fragment.h +47 -0
  35. data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
  36. data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
  37. data/vendor/brotli/enc/compressor.h +15 -0
  38. data/vendor/brotli/enc/context.h +1 -1
  39. data/vendor/brotli/enc/dictionary.h +2 -2
  40. data/vendor/brotli/enc/encode.cc +819 -286
  41. data/vendor/brotli/enc/encode.h +38 -15
  42. data/vendor/brotli/enc/encode_parallel.cc +40 -42
  43. data/vendor/brotli/enc/entropy_encode.cc +144 -147
  44. data/vendor/brotli/enc/entropy_encode.h +32 -8
  45. data/vendor/brotli/enc/entropy_encode_static.h +572 -0
  46. data/vendor/brotli/enc/fast_log.h +7 -40
  47. data/vendor/brotli/enc/find_match_length.h +9 -9
  48. data/vendor/brotli/enc/hash.h +462 -154
  49. data/vendor/brotli/enc/histogram.cc +6 -6
  50. data/vendor/brotli/enc/histogram.h +13 -13
  51. data/vendor/brotli/enc/literal_cost.cc +45 -45
  52. data/vendor/brotli/enc/metablock.cc +92 -89
  53. data/vendor/brotli/enc/metablock.h +12 -12
  54. data/vendor/brotli/enc/port.h +7 -16
  55. data/vendor/brotli/enc/prefix.h +23 -22
  56. data/vendor/brotli/enc/ringbuffer.h +75 -29
  57. data/vendor/brotli/enc/static_dict.cc +56 -48
  58. data/vendor/brotli/enc/static_dict.h +5 -5
  59. data/vendor/brotli/enc/streams.cc +1 -1
  60. data/vendor/brotli/enc/streams.h +5 -5
  61. data/vendor/brotli/enc/transform.h +40 -35
  62. data/vendor/brotli/enc/types.h +2 -0
  63. data/vendor/brotli/enc/utf8_util.cc +3 -2
  64. data/vendor/brotli/enc/write_bits.h +6 -6
  65. metadata +9 -5
  66. data/vendor/brotli/dec/streams.c +0 -102
  67. data/vendor/brotli/dec/streams.h +0 -95
@@ -28,7 +28,7 @@ void BuildHistograms(
28
28
  size_t mask,
29
29
  uint8_t prev_byte,
30
30
  uint8_t prev_byte2,
31
- const std::vector<int>& context_modes,
31
+ const std::vector<ContextType>& context_modes,
32
32
  std::vector<HistogramLiteral>* literal_histograms,
33
33
  std::vector<HistogramCommand>* insert_and_copy_histograms,
34
34
  std::vector<HistogramDistance>* copy_dist_histograms) {
@@ -41,22 +41,22 @@ void BuildHistograms(
41
41
  insert_and_copy_it.Next();
42
42
  (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
43
43
  cmd.cmd_prefix_);
44
- for (int j = 0; j < cmd.insert_len_; ++j) {
44
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
45
45
  literal_it.Next();
46
- int context = (literal_it.type_ << kLiteralContextBits) +
46
+ size_t context = (literal_it.type_ << kLiteralContextBits) +
47
47
  Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
48
48
  (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
49
49
  prev_byte2 = prev_byte;
50
50
  prev_byte = ringbuffer[pos & mask];
51
51
  ++pos;
52
52
  }
53
- pos += cmd.copy_len_;
54
- if (cmd.copy_len_ > 0) {
53
+ pos += cmd.copy_len();
54
+ if (cmd.copy_len()) {
55
55
  prev_byte2 = ringbuffer[(pos - 2) & mask];
56
56
  prev_byte = ringbuffer[(pos - 1) & mask];
57
57
  if (cmd.cmd_prefix_ >= 128) {
58
58
  dist_it.Next();
59
- int context = (dist_it.type_ << kDistanceContextBits) +
59
+ size_t context = (dist_it.type_ << kDistanceContextBits) +
60
60
  cmd.DistanceContext();
61
61
  (*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
62
62
  }
@@ -9,10 +9,10 @@
9
9
  #ifndef BROTLI_ENC_HISTOGRAM_H_
10
10
  #define BROTLI_ENC_HISTOGRAM_H_
11
11
 
12
- #include <string.h>
12
+ #include <cstring>
13
13
  #include <limits>
14
14
  #include <vector>
15
- #include <utility>
15
+ #include "./context.h"
16
16
  #include "./command.h"
17
17
  #include "./fast_log.h"
18
18
  #include "./prefix.h"
@@ -25,37 +25,37 @@ struct BlockSplit;
25
25
  // A simple container for histograms of data in blocks.
26
26
  template<int kDataSize>
27
27
  struct Histogram {
28
- Histogram() {
28
+ Histogram(void) {
29
29
  Clear();
30
30
  }
31
- void Clear() {
31
+ void Clear(void) {
32
32
  memset(data_, 0, sizeof(data_));
33
33
  total_count_ = 0;
34
34
  bit_cost_ = std::numeric_limits<double>::infinity();
35
35
  }
36
- void Add(int val) {
36
+ void Add(size_t val) {
37
37
  ++data_[val];
38
38
  ++total_count_;
39
39
  }
40
- void Remove(int val) {
40
+ void Remove(size_t val) {
41
41
  --data_[val];
42
42
  --total_count_;
43
43
  }
44
44
  template<typename DataType>
45
45
  void Add(const DataType *p, size_t n) {
46
- total_count_ += static_cast<int>(n);
46
+ total_count_ += n;
47
47
  n += 1;
48
48
  while(--n) ++data_[*p++];
49
49
  }
50
50
  void AddHistogram(const Histogram& v) {
51
51
  total_count_ += v.total_count_;
52
- for (int i = 0; i < kDataSize; ++i) {
52
+ for (size_t i = 0; i < kDataSize; ++i) {
53
53
  data_[i] += v.data_[i];
54
54
  }
55
55
  }
56
56
 
57
- int data_[kDataSize];
58
- int total_count_;
57
+ uint32_t data_[kDataSize];
58
+ size_t total_count_;
59
59
  double bit_cost_;
60
60
  };
61
61
 
@@ -70,8 +70,8 @@ typedef Histogram<272> HistogramContextMap;
70
70
  // Block type histogram, 256 block types + 2 special symbols.
71
71
  typedef Histogram<258> HistogramBlockType;
72
72
 
73
- static const int kLiteralContextBits = 6;
74
- static const int kDistanceContextBits = 2;
73
+ static const size_t kLiteralContextBits = 6;
74
+ static const size_t kDistanceContextBits = 2;
75
75
 
76
76
  void BuildHistograms(
77
77
  const Command* cmds,
@@ -84,7 +84,7 @@ void BuildHistograms(
84
84
  size_t mask,
85
85
  uint8_t prev_byte,
86
86
  uint8_t prev_byte2,
87
- const std::vector<int>& context_modes,
87
+ const std::vector<ContextType>& context_modes,
88
88
  std::vector<HistogramLiteral>* literal_histograms,
89
89
  std::vector<HistogramCommand>* insert_and_copy_histograms,
90
90
  std::vector<HistogramDistance>* copy_dist_histograms);
@@ -17,29 +17,29 @@
17
17
 
18
18
  namespace brotli {
19
19
 
20
- static int UTF8Position(int last, int c, int clamp) {
20
+ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
21
21
  if (c < 128) {
22
22
  return 0; // Next one is the 'Byte 1' again.
23
- } else if (c >= 192) {
24
- return std::min(1, clamp); // Next one is the 'Byte 2' of utf-8 encoding.
23
+ } else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
24
+ return std::min<size_t>(1, clamp);
25
25
  } else {
26
26
  // Let's decide over the last byte if this ends the sequence.
27
27
  if (last < 0xe0) {
28
28
  return 0; // Completed two or three byte coding.
29
- } else {
30
- return std::min(2, clamp); // Next one is the 'Byte 3' of utf-8 encoding.
29
+ } else { // Next one is the 'Byte 3' of utf-8 encoding.
30
+ return std::min<size_t>(2, clamp);
31
31
  }
32
32
  }
33
33
  }
34
34
 
35
- static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
36
- const uint8_t *data) {
37
- int counts[3] = { 0 };
38
- int max_utf8 = 1; // should be 2, but 1 compresses better.
39
- int last_c = 0;
40
- int utf8_pos = 0;
35
+ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
36
+ const uint8_t *data) {
37
+ size_t counts[3] = { 0 };
38
+ size_t max_utf8 = 1; // should be 2, but 1 compresses better.
39
+ size_t last_c = 0;
40
+ size_t utf8_pos = 0;
41
41
  for (size_t i = 0; i < len; ++i) {
42
- int c = data[(pos + i) & mask];
42
+ size_t c = data[(pos + i) & mask];
43
43
  utf8_pos = UTF8Position(last_c, c, 2);
44
44
  ++counts[utf8_pos];
45
45
  last_c = c;
@@ -53,22 +53,22 @@ static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
53
53
  return max_utf8;
54
54
  }
55
55
 
56
- void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
57
- const uint8_t *data, float *cost) {
56
+ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
57
+ const uint8_t *data, float *cost) {
58
58
 
59
59
  // max_utf8 is 0 (normal ascii single byte modeling),
60
60
  // 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
61
- const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
62
- int histogram[3][256] = { { 0 } };
63
- int window_half = 495;
64
- int in_window = std::min(window_half, static_cast<int>(len));
65
- int in_window_utf8[3] = { 0 };
61
+ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
62
+ size_t histogram[3][256] = { { 0 } };
63
+ size_t window_half = 495;
64
+ size_t in_window = std::min(window_half, len);
65
+ size_t in_window_utf8[3] = { 0 };
66
66
 
67
67
  // Bootstrap histograms.
68
- int last_c = 0;
69
- int utf8_pos = 0;
70
- for (int i = 0; i < in_window; ++i) {
71
- int c = data[(pos + i) & mask];
68
+ size_t last_c = 0;
69
+ size_t utf8_pos = 0;
70
+ for (size_t i = 0; i < in_window; ++i) {
71
+ size_t c = data[(pos + i) & mask];
72
72
  ++histogram[utf8_pos][c];
73
73
  ++in_window_utf8[utf8_pos];
74
74
  utf8_pos = UTF8Position(last_c, c, max_utf8);
@@ -76,30 +76,30 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
76
76
  }
77
77
 
78
78
  // Compute bit costs with sliding window.
79
- for (int i = 0; i < static_cast<int>(len); ++i) {
80
- if (i - window_half >= 0) {
79
+ for (size_t i = 0; i < len; ++i) {
80
+ if (i >= window_half) {
81
81
  // Remove a byte in the past.
82
- int c = (i - window_half - 1) < 0 ?
82
+ size_t c = i < window_half + 1 ?
83
83
  0 : data[(pos + i - window_half - 1) & mask];
84
- int last_c = (i - window_half - 2) < 0 ?
84
+ size_t last_c = i < window_half + 2 ?
85
85
  0 : data[(pos + i - window_half - 2) & mask];
86
- int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
86
+ size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
87
87
  --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
88
88
  --in_window_utf8[utf8_pos2];
89
89
  }
90
- if (i + window_half < static_cast<int>(len)) {
90
+ if (i + window_half < len) {
91
91
  // Add a byte in the future.
92
- int c = data[(pos + i + window_half - 1) & mask];
93
- int last_c = data[(pos + i + window_half - 2) & mask];
94
- int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
92
+ size_t c = data[(pos + i + window_half - 1) & mask];
93
+ size_t last_c = data[(pos + i + window_half - 2) & mask];
94
+ size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
95
95
  ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
96
96
  ++in_window_utf8[utf8_pos2];
97
97
  }
98
- int c = i < 1 ? 0 : data[(pos + i - 1) & mask];
99
- int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
100
- int utf8_pos = UTF8Position(last_c, c, max_utf8);
98
+ size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
99
+ size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
100
+ size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
101
101
  size_t masked_pos = (pos + i) & mask;
102
- int histo = histogram[utf8_pos][data[masked_pos]];
102
+ size_t histo = histogram[utf8_pos][data[masked_pos]];
103
103
  if (histo == 0) {
104
104
  histo = 1;
105
105
  }
@@ -114,7 +114,7 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
114
114
  // rapidly in the beginning of the file, perhaps because the beginning
115
115
  // of the data is a statistical "anomaly".
116
116
  if (i < 2000) {
117
- lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
117
+ lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
118
118
  }
119
119
  cost[i] = static_cast<float>(lit_cost);
120
120
  }
@@ -126,28 +126,28 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
126
126
  EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
127
127
  return;
128
128
  }
129
- int histogram[256] = { 0 };
130
- int window_half = 2000;
131
- int in_window = std::min(window_half, static_cast<int>(len));
129
+ size_t histogram[256] = { 0 };
130
+ size_t window_half = 2000;
131
+ size_t in_window = std::min(window_half, len);
132
132
 
133
133
  // Bootstrap histogram.
134
- for (int i = 0; i < in_window; ++i) {
134
+ for (size_t i = 0; i < in_window; ++i) {
135
135
  ++histogram[data[(pos + i) & mask]];
136
136
  }
137
137
 
138
138
  // Compute bit costs with sliding window.
139
- for (int i = 0; i < static_cast<int>(len); ++i) {
140
- if (i - window_half >= 0) {
139
+ for (size_t i = 0; i < len; ++i) {
140
+ if (i >= window_half) {
141
141
  // Remove a byte in the past.
142
142
  --histogram[data[(pos + i - window_half) & mask]];
143
143
  --in_window;
144
144
  }
145
- if (i + window_half < static_cast<int>(len)) {
145
+ if (i + window_half < len) {
146
146
  // Add a byte in the future.
147
147
  ++histogram[data[(pos + i + window_half) & mask]];
148
148
  ++in_window;
149
149
  }
150
- int histo = histogram[data[(pos + i) & mask]];
150
+ size_t histo = histogram[data[(pos + i) & mask]];
151
151
  if (histo == 0) {
152
152
  histo = 1;
153
153
  }