brotli 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.gitmodules +1 -1
  3. data/.travis.yml +2 -1
  4. data/README.md +1 -1
  5. data/Rakefile +1 -1
  6. data/ext/brotli/brotli.cc +1 -1
  7. data/ext/brotli/extconf.rb +72 -14
  8. data/lib/brotli/version.rb +1 -1
  9. data/vendor/brotli/LICENSE +19 -202
  10. data/vendor/brotli/dec/Makefile +1 -1
  11. data/vendor/brotli/dec/bit_reader.c +23 -30
  12. data/vendor/brotli/dec/bit_reader.h +270 -141
  13. data/vendor/brotli/dec/context.h +3 -12
  14. data/vendor/brotli/dec/decode.c +1813 -1048
  15. data/vendor/brotli/dec/decode.h +22 -16
  16. data/vendor/brotli/dec/dictionary.c +9466 -0
  17. data/vendor/brotli/dec/dictionary.h +6 -9461
  18. data/vendor/brotli/dec/huffman.c +104 -71
  19. data/vendor/brotli/dec/huffman.h +19 -28
  20. data/vendor/brotli/dec/port.h +124 -32
  21. data/vendor/brotli/dec/prefix.h +4 -13
  22. data/vendor/brotli/dec/state.c +93 -56
  23. data/vendor/brotli/dec/state.h +124 -53
  24. data/vendor/brotli/dec/streams.c +14 -11
  25. data/vendor/brotli/dec/streams.h +6 -11
  26. data/vendor/brotli/dec/transform.h +2 -11
  27. data/vendor/brotli/dec/types.h +21 -19
  28. data/vendor/brotli/enc/Makefile +4 -1
  29. data/vendor/brotli/enc/backward_references.cc +87 -94
  30. data/vendor/brotli/enc/backward_references.h +8 -18
  31. data/vendor/brotli/enc/bit_cost.h +11 -19
  32. data/vendor/brotli/enc/block_splitter.cc +43 -48
  33. data/vendor/brotli/enc/block_splitter.h +7 -16
  34. data/vendor/brotli/enc/brotli_bit_stream.cc +48 -50
  35. data/vendor/brotli/enc/brotli_bit_stream.h +7 -16
  36. data/vendor/brotli/enc/cluster.h +24 -25
  37. data/vendor/brotli/enc/command.h +34 -41
  38. data/vendor/brotli/enc/context.h +11 -18
  39. data/vendor/brotli/enc/dictionary.cc +9466 -0
  40. data/vendor/brotli/enc/dictionary.h +20 -9464
  41. data/vendor/brotli/enc/dictionary_hash.h +7 -15
  42. data/vendor/brotli/enc/encode.cc +80 -148
  43. data/vendor/brotli/enc/encode.h +19 -29
  44. data/vendor/brotli/enc/encode_parallel.cc +35 -108
  45. data/vendor/brotli/enc/encode_parallel.h +7 -16
  46. data/vendor/brotli/enc/entropy_encode.cc +33 -42
  47. data/vendor/brotli/enc/entropy_encode.h +8 -16
  48. data/vendor/brotli/enc/fast_log.h +8 -15
  49. data/vendor/brotli/enc/find_match_length.h +7 -17
  50. data/vendor/brotli/enc/hash.h +130 -150
  51. data/vendor/brotli/enc/histogram.cc +7 -16
  52. data/vendor/brotli/enc/histogram.h +11 -17
  53. data/vendor/brotli/enc/literal_cost.cc +28 -35
  54. data/vendor/brotli/enc/literal_cost.h +9 -23
  55. data/vendor/brotli/enc/metablock.cc +18 -26
  56. data/vendor/brotli/enc/metablock.h +6 -14
  57. data/vendor/brotli/enc/port.h +14 -14
  58. data/vendor/brotli/enc/prefix.h +11 -18
  59. data/vendor/brotli/enc/ringbuffer.h +18 -27
  60. data/vendor/brotli/enc/static_dict.cc +7 -1
  61. data/vendor/brotli/enc/static_dict.h +7 -15
  62. data/vendor/brotli/enc/static_dict_lut.h +7 -15
  63. data/vendor/brotli/enc/streams.cc +15 -28
  64. data/vendor/brotli/enc/streams.h +27 -35
  65. data/vendor/brotli/enc/transform.h +9 -16
  66. data/vendor/brotli/enc/types.h +27 -0
  67. data/vendor/brotli/enc/utf8_util.cc +82 -0
  68. data/vendor/brotli/enc/utf8_util.h +25 -0
  69. data/vendor/brotli/enc/write_bits.h +11 -18
  70. metadata +7 -2
@@ -1,24 +1,15 @@
1
- // Copyright 2013 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
15
7
  // Functions for clustering similar histograms together.
16
8
 
17
9
  #ifndef BROTLI_ENC_CLUSTER_H_
18
10
  #define BROTLI_ENC_CLUSTER_H_
19
11
 
20
12
  #include <math.h>
21
- #include <stdint.h>
22
13
  #include <stdio.h>
23
14
  #include <algorithm>
24
15
  #include <complex>
@@ -31,6 +22,8 @@
31
22
  #include "./entropy_encode.h"
32
23
  #include "./fast_log.h"
33
24
  #include "./histogram.h"
25
+ #include "./port.h"
26
+ #include "./types.h"
34
27
 
35
28
  namespace brotli {
36
29
 
@@ -111,22 +104,25 @@ void HistogramCombine(HistogramType* out,
111
104
  int* cluster_size,
112
105
  int* symbols,
113
106
  int symbols_size,
114
- int max_clusters) {
107
+ size_t max_clusters) {
115
108
  double cost_diff_threshold = 0.0;
116
- int min_cluster_size = 1;
109
+ size_t min_cluster_size = 1;
117
110
  std::set<int> all_symbols;
118
111
  std::vector<int> clusters;
119
112
  for (int i = 0; i < symbols_size; ++i) {
120
113
  if (all_symbols.find(symbols[i]) == all_symbols.end()) {
121
114
  all_symbols.insert(symbols[i]);
115
+ if (!clusters.empty()) {
116
+ BROTLI_DCHECK(clusters.back() < symbols[i]);
117
+ }
122
118
  clusters.push_back(symbols[i]);
123
119
  }
124
120
  }
125
121
 
126
122
  // We maintain a heap of histogram pairs, ordered by the bit cost reduction.
127
123
  std::vector<HistogramPair> pairs;
128
- for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
129
- for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
124
+ for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) {
125
+ for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
130
126
  CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
131
127
  &pairs);
132
128
  }
@@ -149,14 +145,14 @@ void HistogramCombine(HistogramType* out,
149
145
  symbols[i] = best_idx1;
150
146
  }
151
147
  }
152
- for (int i = 0; i + 1 < clusters.size(); ++i) {
148
+ for (size_t i = 0; i + 1 < clusters.size(); ++i) {
153
149
  if (clusters[i] >= best_idx2) {
154
150
  clusters[i] = clusters[i + 1];
155
151
  }
156
152
  }
157
153
  clusters.pop_back();
158
154
  // Invalidate pairs intersecting the just combined best pair.
159
- for (int i = 0; i < pairs.size(); ++i) {
155
+ for (size_t i = 0; i < pairs.size(); ++i) {
160
156
  HistogramPair& p = pairs[i];
161
157
  if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
162
158
  p.idx1 == best_idx2 || p.idx2 == best_idx2) {
@@ -169,7 +165,7 @@ void HistogramCombine(HistogramType* out,
169
165
  pairs.pop_back();
170
166
  }
171
167
  // Push new pairs formed with the combined histogram to the heap.
172
- for (int i = 0; i < clusters.size(); ++i) {
168
+ for (size_t i = 0; i < clusters.size(); ++i) {
173
169
  CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
174
170
  }
175
171
  }
@@ -232,7 +228,7 @@ void HistogramReindex(std::vector<HistogramType>* out,
232
228
  std::vector<HistogramType> tmp(*out);
233
229
  std::map<int, int> new_index;
234
230
  int next_index = 0;
235
- for (int i = 0; i < symbols->size(); ++i) {
231
+ for (size_t i = 0; i < symbols->size(); ++i) {
236
232
  if (new_index.find((*symbols)[i]) == new_index.end()) {
237
233
  new_index[(*symbols)[i]] = next_index;
238
234
  (*out)[next_index] = tmp[(*symbols)[i]];
@@ -240,7 +236,7 @@ void HistogramReindex(std::vector<HistogramType>* out,
240
236
  }
241
237
  }
242
238
  out->resize(next_index);
243
- for (int i = 0; i < symbols->size(); ++i) {
239
+ for (size_t i = 0; i < symbols->size(); ++i) {
244
240
  (*symbols)[i] = new_index[(*symbols)[i]];
245
241
  }
246
242
  }
@@ -251,10 +247,11 @@ void HistogramReindex(std::vector<HistogramType>* out,
251
247
  template<typename HistogramType>
252
248
  void ClusterHistograms(const std::vector<HistogramType>& in,
253
249
  int num_contexts, int num_blocks,
254
- int max_histograms,
250
+ size_t max_histograms,
255
251
  std::vector<HistogramType>* out,
256
252
  std::vector<int>* histogram_symbols) {
257
253
  const int in_size = num_contexts * num_blocks;
254
+ BROTLI_DCHECK(in_size == in.size());
258
255
  std::vector<int> cluster_size(in_size, 1);
259
256
  out->resize(in_size);
260
257
  histogram_symbols->resize(in_size);
@@ -264,6 +261,7 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
264
261
  (*histogram_symbols)[i] = i;
265
262
  }
266
263
 
264
+
267
265
  const int max_input_histograms = 64;
268
266
  for (int i = 0; i < in_size; i += max_input_histograms) {
269
267
  int num_to_combine = std::min(in_size - i, max_input_histograms);
@@ -282,6 +280,7 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
282
280
 
283
281
  // Convert the context map to a canonical form.
284
282
  HistogramReindex(out, histogram_symbols);
283
+
285
284
  }
286
285
 
287
286
 
@@ -1,25 +1,17 @@
1
- // Copyright 2013 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
15
7
  // This class models a sequence of literals and a backward reference copy.
16
8
 
17
9
  #ifndef BROTLI_ENC_COMMAND_H_
18
10
  #define BROTLI_ENC_COMMAND_H_
19
11
 
20
- #include <stdint.h>
21
12
  #include "./fast_log.h"
22
13
  #include "./prefix.h"
14
+ #include "./types.h"
23
15
 
24
16
  namespace brotli {
25
17
 
@@ -32,66 +24,67 @@ static int copybase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38,
32
24
  static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4,
33
25
  4, 5, 5, 6, 7, 8, 9, 10, 24 };
34
26
 
35
- static inline int GetInsertLengthCode(int insertlen) {
27
+ static inline uint16_t GetInsertLengthCode(int insertlen) {
36
28
  if (insertlen < 6) {
37
- return insertlen;
29
+ return static_cast<uint16_t>(insertlen);
38
30
  } else if (insertlen < 130) {
39
31
  insertlen -= 2;
40
32
  int nbits = Log2FloorNonZero(insertlen) - 1;
41
- return (nbits << 1) + (insertlen >> nbits) + 2;
33
+ return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
42
34
  } else if (insertlen < 2114) {
43
- return Log2FloorNonZero(insertlen - 66) + 10;
35
+ return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
44
36
  } else if (insertlen < 6210) {
45
- return 21;
37
+ return 21u;
46
38
  } else if (insertlen < 22594) {
47
- return 22;
39
+ return 22u;
48
40
  } else {
49
- return 23;
41
+ return 23u;
50
42
  }
51
43
  }
52
44
 
53
- static inline int GetCopyLengthCode(int copylen) {
45
+ static inline uint16_t GetCopyLengthCode(int copylen) {
54
46
  if (copylen < 10) {
55
- return copylen - 2;
47
+ return static_cast<uint16_t>(copylen - 2);
56
48
  } else if (copylen < 134) {
57
49
  copylen -= 6;
58
50
  int nbits = Log2FloorNonZero(copylen) - 1;
59
- return (nbits << 1) + (copylen >> nbits) + 4;
51
+ return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
60
52
  } else if (copylen < 2118) {
61
- return Log2FloorNonZero(copylen - 70) + 12;
53
+ return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
62
54
  } else {
63
- return 23;
55
+ return 23u;
64
56
  }
65
57
  }
66
58
 
67
- static inline int CombineLengthCodes(
68
- int inscode, int copycode, int distancecode) {
69
- int bits64 = (copycode & 0x7u) | ((inscode & 0x7u) << 3);
70
- if (distancecode == 0 && inscode < 8 && copycode < 16) {
59
+ static inline uint16_t CombineLengthCodes(
60
+ uint16_t inscode, uint16_t copycode, bool use_last_distance) {
61
+ uint16_t bits64 =
62
+ static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
63
+ if (use_last_distance && inscode < 8 && copycode < 16) {
71
64
  return (copycode < 8) ? bits64 : (bits64 | 64);
72
65
  } else {
73
66
  // "To convert an insert-and-copy length code to an insert length code and
74
67
  // a copy length code, the following table can be used"
75
- static const int cells[9] = { 2, 3, 6, 4, 5, 8, 7, 9, 10 };
76
- return (cells[(copycode >> 3) + 3 * (inscode >> 3)] << 6) | bits64;
68
+ static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
69
+ 448u, 576u, 640u };
70
+ return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
77
71
  }
78
72
  }
79
73
 
80
- static inline void GetLengthCode(int insertlen, int copylen, int distancecode,
74
+ static inline void GetLengthCode(int insertlen, int copylen,
75
+ bool use_last_distance,
81
76
  uint16_t* code, uint64_t* extra) {
82
- int inscode = GetInsertLengthCode(insertlen);
83
- int copycode = GetCopyLengthCode(copylen);
77
+ uint16_t inscode = GetInsertLengthCode(insertlen);
78
+ uint16_t copycode = GetCopyLengthCode(copylen);
84
79
  uint64_t insnumextra = insextra[inscode];
85
80
  uint64_t numextra = insnumextra + copyextra[copycode];
86
81
  uint64_t insextraval = insertlen - insbase[inscode];
87
82
  uint64_t copyextraval = copylen - copybase[copycode];
88
- *code = CombineLengthCodes(inscode, copycode, distancecode);
83
+ *code = CombineLengthCodes(inscode, copycode, use_last_distance);
89
84
  *extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
90
85
  }
91
86
 
92
87
  struct Command {
93
- Command() {}
94
-
95
88
  // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
96
89
  Command(int insertlen, int copylen, int copylen_code, int distance_code)
97
90
  : insert_len_(insertlen), copy_len_(copylen) {
@@ -99,13 +92,13 @@ struct Command {
99
92
  // npostfix and ndirect were 0, they are only recomputed later after the
100
93
  // clustering if needed.
101
94
  PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
102
- GetLengthCode(insertlen, copylen_code, dist_prefix_,
95
+ GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
103
96
  &cmd_prefix_, &cmd_extra_);
104
97
  }
105
98
 
106
99
  Command(int insertlen)
107
100
  : insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) {
108
- GetLengthCode(insertlen, 4, dist_prefix_, &cmd_prefix_, &cmd_extra_);
101
+ GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_, &cmd_extra_);
109
102
  }
110
103
 
111
104
  int DistanceCode() const {
@@ -1,29 +1,21 @@
1
- // Copyright 2013 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
15
7
  // Functions to map previous bytes into a context id.
16
8
 
17
9
  #ifndef BROTLI_ENC_CONTEXT_H_
18
10
  #define BROTLI_ENC_CONTEXT_H_
19
11
 
20
- #include <stdint.h>
12
+ #include "./types.h"
21
13
 
22
14
  namespace brotli {
23
15
 
24
16
  // Second-order context lookup table for UTF8 byte streams.
25
17
  //
26
- // If p1 and p2 are the previous two bytes, we calcualte the context as
18
+ // If p1 and p2 are the previous two bytes, we calculate the context as
27
19
  //
28
20
  // context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
29
21
  //
@@ -170,11 +162,12 @@ static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
170
162
  case CONTEXT_LSB6:
171
163
  return p1 & 0x3f;
172
164
  case CONTEXT_MSB6:
173
- return p1 >> 2;
165
+ return static_cast<uint8_t>(p1 >> 2);
174
166
  case CONTEXT_UTF8:
175
167
  return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
176
168
  case CONTEXT_SIGNED:
177
- return (kSigned3BitContextLookup[p1] << 3) + kSigned3BitContextLookup[p2];
169
+ return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
170
+ kSigned3BitContextLookup[p2]);
178
171
  default:
179
172
  return 0;
180
173
  }