brotli 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/ext/brotli/brotli.cc +114 -24
  3. data/ext/brotli/brotli.h +0 -1
  4. data/ext/brotli/extconf.rb +30 -23
  5. data/lib/brotli/version.rb +1 -1
  6. data/vendor/brotli/LICENSE +1 -1
  7. data/vendor/brotli/dec/Makefile +1 -1
  8. data/vendor/brotli/dec/bit_reader.c +3 -3
  9. data/vendor/brotli/dec/bit_reader.h +25 -27
  10. data/vendor/brotli/dec/context.h +4 -4
  11. data/vendor/brotli/dec/decode.c +410 -486
  12. data/vendor/brotli/dec/decode.h +101 -105
  13. data/vendor/brotli/dec/dictionary.c +1 -1
  14. data/vendor/brotli/dec/dictionary.h +7 -8
  15. data/vendor/brotli/dec/huffman.c +103 -105
  16. data/vendor/brotli/dec/huffman.h +18 -18
  17. data/vendor/brotli/dec/port.h +52 -40
  18. data/vendor/brotli/dec/prefix.h +2 -0
  19. data/vendor/brotli/dec/state.c +13 -19
  20. data/vendor/brotli/dec/state.h +25 -39
  21. data/vendor/brotli/dec/transform.h +38 -44
  22. data/vendor/brotli/dec/types.h +2 -2
  23. data/vendor/brotli/enc/Makefile +1 -1
  24. data/vendor/brotli/enc/backward_references.cc +455 -359
  25. data/vendor/brotli/enc/backward_references.h +79 -3
  26. data/vendor/brotli/enc/bit_cost.h +54 -32
  27. data/vendor/brotli/enc/block_splitter.cc +285 -193
  28. data/vendor/brotli/enc/block_splitter.h +4 -12
  29. data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
  30. data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
  31. data/vendor/brotli/enc/cluster.h +161 -120
  32. data/vendor/brotli/enc/command.h +60 -37
  33. data/vendor/brotli/enc/compress_fragment.cc +701 -0
  34. data/vendor/brotli/enc/compress_fragment.h +47 -0
  35. data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
  36. data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
  37. data/vendor/brotli/enc/compressor.h +15 -0
  38. data/vendor/brotli/enc/context.h +1 -1
  39. data/vendor/brotli/enc/dictionary.h +2 -2
  40. data/vendor/brotli/enc/encode.cc +819 -286
  41. data/vendor/brotli/enc/encode.h +38 -15
  42. data/vendor/brotli/enc/encode_parallel.cc +40 -42
  43. data/vendor/brotli/enc/entropy_encode.cc +144 -147
  44. data/vendor/brotli/enc/entropy_encode.h +32 -8
  45. data/vendor/brotli/enc/entropy_encode_static.h +572 -0
  46. data/vendor/brotli/enc/fast_log.h +7 -40
  47. data/vendor/brotli/enc/find_match_length.h +9 -9
  48. data/vendor/brotli/enc/hash.h +462 -154
  49. data/vendor/brotli/enc/histogram.cc +6 -6
  50. data/vendor/brotli/enc/histogram.h +13 -13
  51. data/vendor/brotli/enc/literal_cost.cc +45 -45
  52. data/vendor/brotli/enc/metablock.cc +92 -89
  53. data/vendor/brotli/enc/metablock.h +12 -12
  54. data/vendor/brotli/enc/port.h +7 -16
  55. data/vendor/brotli/enc/prefix.h +23 -22
  56. data/vendor/brotli/enc/ringbuffer.h +75 -29
  57. data/vendor/brotli/enc/static_dict.cc +56 -48
  58. data/vendor/brotli/enc/static_dict.h +5 -5
  59. data/vendor/brotli/enc/streams.cc +1 -1
  60. data/vendor/brotli/enc/streams.h +5 -5
  61. data/vendor/brotli/enc/transform.h +40 -35
  62. data/vendor/brotli/enc/types.h +2 -0
  63. data/vendor/brotli/enc/utf8_util.cc +3 -2
  64. data/vendor/brotli/enc/write_bits.h +6 -6
  65. metadata +9 -5
  66. data/vendor/brotli/dec/streams.c +0 -102
  67. data/vendor/brotli/dec/streams.h +0 -95
@@ -26,7 +26,7 @@ static const int kMinInputBlockBits = 16;
26
26
  static const int kMaxInputBlockBits = 24;
27
27
 
28
28
  struct BrotliParams {
29
- BrotliParams()
29
+ BrotliParams(void)
30
30
  : mode(MODE_GENERIC),
31
31
  quality(11),
32
32
  lgwin(22),
@@ -68,10 +68,10 @@ struct BrotliParams {
68
68
  class BrotliCompressor {
69
69
  public:
70
70
  explicit BrotliCompressor(BrotliParams params);
71
- ~BrotliCompressor();
71
+ ~BrotliCompressor(void);
72
72
 
73
73
  // The maximum input size that can be processed at once.
74
- size_t input_block_size() const { return size_t(1) << params_.lgblock; }
74
+ size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
75
75
 
76
76
  // Encodes the data in input_buffer as a meta-block and writes it to
77
77
  // encoded_buffer (*encoded_size should be set to the size of
@@ -115,9 +115,9 @@ class BrotliCompressor {
115
115
  // the new output meta-block, or to zero if no new output meta-block was
116
116
  // created (in this case the processed input data is buffered internally).
117
117
  // If *out_size is positive, *output points to the start of the output data.
118
- // Returns false if the size of the input data is larger than
119
- // input_block_size() or if there was an error during writing the output.
120
118
  // If is_last or force_flush is true, an output meta-block is always created.
119
+ // Returns false if the size of the input data is larger than
120
+ // input_block_size().
121
121
  bool WriteBrotliData(const bool is_last, const bool force_flush,
122
122
  size_t* out_size, uint8_t** output);
123
123
 
@@ -129,28 +129,29 @@ class BrotliCompressor {
129
129
  void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
130
130
 
131
131
  // No-op, but we keep it here for API backward-compatibility.
132
- void WriteStreamHeader() {}
132
+ void WriteStreamHeader(void) {}
133
133
 
134
134
  private:
135
135
  uint8_t* GetBrotliStorage(size_t size);
136
136
 
137
- bool WriteMetaBlockInternal(const bool is_last,
138
- size_t* out_size,
139
- uint8_t** output);
137
+ // Allocates and clears a hash table using memory in "*this",
138
+ // stores the number of buckets in "*table_size" and returns a pointer to
139
+ // the base of the hash table.
140
+ int* GetHashTable(int quality,
141
+ size_t input_size, size_t* table_size);
140
142
 
141
143
  BrotliParams params_;
142
- int max_backward_distance_;
143
144
  Hashers* hashers_;
144
145
  int hash_type_;
145
- size_t input_pos_;
146
+ uint64_t input_pos_;
146
147
  RingBuffer* ringbuffer_;
147
148
  size_t cmd_alloc_size_;
148
149
  Command* commands_;
149
150
  size_t num_commands_;
150
- int num_literals_;
151
- int last_insert_len_;
152
- size_t last_flush_pos_;
153
- size_t last_processed_pos_;
151
+ size_t num_literals_;
152
+ size_t last_insert_len_;
153
+ uint64_t last_flush_pos_;
154
+ uint64_t last_processed_pos_;
154
155
  int dist_cache_[4];
155
156
  int saved_dist_cache_[4];
156
157
  uint8_t last_byte_;
@@ -159,6 +160,28 @@ class BrotliCompressor {
159
160
  uint8_t prev_byte2_;
160
161
  size_t storage_size_;
161
162
  uint8_t* storage_;
163
+ // Hash table for quality 0 mode.
164
+ int small_table_[1 << 10]; // 2KB
165
+ int* large_table_; // Allocated only when needed
166
+ // Command and distance prefix codes (each 64 symbols, stored back-to-back)
167
+ // used for the next block in quality 0. The command prefix code is over a
168
+ // smaller alphabet with the following 64 symbols:
169
+ // 0 - 15: insert length code 0, copy length code 0 - 15, same distance
170
+ // 16 - 39: insert length code 0, copy length code 0 - 23
171
+ // 40 - 63: insert length code 0 - 23, copy length code 0
172
+ // Note that symbols 16 and 40 represent the same code in the full alphabet,
173
+ // but we do not use either of them in quality 0.
174
+ uint8_t cmd_depths_[128];
175
+ uint16_t cmd_bits_[128];
176
+ // The compressed form of the command and distance prefix codes for the next
177
+ // block in quality 0.
178
+ uint8_t cmd_code_[512];
179
+ size_t cmd_code_numbits_;
180
+ // Command and literal buffers for quality 1.
181
+ uint32_t* command_buf_;
182
+ uint8_t* literal_buf_;
183
+
184
+ int is_last_block_emitted_;
162
185
  };
163
186
 
164
187
  // Compresses the data in input_buffer into encoded_buffer, and sets
@@ -32,15 +32,15 @@ namespace brotli {
32
32
  namespace {
33
33
 
34
34
  void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
35
- int num_direct_distance_codes,
36
- int distance_postfix_bits) {
35
+ uint32_t num_direct_distance_codes,
36
+ uint32_t distance_postfix_bits) {
37
37
  if (num_direct_distance_codes == 0 &&
38
38
  distance_postfix_bits == 0) {
39
39
  return;
40
40
  }
41
41
  for (size_t i = 0; i < num_commands; ++i) {
42
42
  Command* cmd = &cmds[i];
43
- if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
43
+ if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
44
44
  PrefixEncodeCopyDistance(cmd->DistanceCode(),
45
45
  num_direct_distance_codes,
46
46
  distance_postfix_bits,
@@ -51,21 +51,20 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
51
51
  }
52
52
 
53
53
  bool WriteMetaBlockParallel(const BrotliParams& params,
54
- const size_t block_size,
54
+ const uint32_t input_size,
55
55
  const uint8_t* input_buffer,
56
- const size_t prefix_size,
56
+ const uint32_t prefix_size,
57
57
  const uint8_t* prefix_buffer,
58
58
  const bool is_first,
59
59
  const bool is_last,
60
60
  size_t* encoded_size,
61
61
  uint8_t* encoded_buffer) {
62
- if (block_size == 0) {
62
+ if (input_size == 0) {
63
63
  return false;
64
64
  }
65
- const size_t input_size = block_size;
66
65
 
67
66
  // Copy prefix + next input block into a continuous area.
68
- size_t input_pos = prefix_size;
67
+ uint32_t input_pos = prefix_size;
69
68
  // CreateBackwardReferences reads up to 3 bytes past the end of input if the
70
69
  // mask points past the end of input.
71
70
  // FindMatchLengthWithLimit could do another 8 bytes look-forward.
@@ -75,7 +74,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
75
74
  // Since we don't have a ringbuffer, masking is a no-op.
76
75
  // We use one less bit than the full range because some of the code uses
77
76
  // mask + 1 as the size of the ringbuffer.
78
- const size_t mask = std::numeric_limits<size_t>::max() >> 1;
77
+ const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
79
78
 
80
79
  uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
81
80
  uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
@@ -86,15 +85,14 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
86
85
  kMinUTF8Ratio);
87
86
 
88
87
  // Initialize hashers.
89
- int hash_type = std::min(9, params.quality);
88
+ int hash_type = std::min(10, params.quality);
90
89
  Hashers* hashers = new Hashers();
91
90
  hashers->Init(hash_type);
92
91
 
93
92
  // Compute backward references.
94
- int last_insert_len = 0;
93
+ size_t last_insert_len = 0;
95
94
  size_t num_commands = 0;
96
- int num_literals = 0;
97
- int max_backward_distance = (1 << params.lgwin) - 16;
95
+ size_t num_literals = 0;
98
96
  int dist_cache[4] = { -4, -4, -4, -4 };
99
97
  Command* commands = static_cast<Command*>(
100
98
  malloc(sizeof(Command) * ((input_size + 1) >> 1)));
@@ -103,10 +101,10 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
103
101
  return false;
104
102
  }
105
103
  CreateBackwardReferences(
106
- input_size, input_pos,
104
+ input_size, input_pos, is_last,
107
105
  &input[0], mask,
108
- max_backward_distance,
109
106
  params.quality,
107
+ params.lgwin,
110
108
  hashers,
111
109
  hash_type,
112
110
  dist_cache,
@@ -123,10 +121,11 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
123
121
 
124
122
  // Build the meta-block.
125
123
  MetaBlockSplit mb;
126
- int num_direct_distance_codes =
124
+ uint32_t num_direct_distance_codes =
127
125
  params.mode == BrotliParams::MODE_FONT ? 12 : 0;
128
- int distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
129
- int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
126
+ uint32_t distance_postfix_bits =
127
+ params.mode == BrotliParams::MODE_FONT ? 1 : 0;
128
+ ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
130
129
  RecomputeDistancePrefixes(commands, num_commands,
131
130
  num_direct_distance_codes,
132
131
  distance_postfix_bits);
@@ -145,8 +144,8 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
145
144
  // Set up the temporary output storage.
146
145
  const size_t max_out_size = 2 * input_size + 500;
147
146
  std::vector<uint8_t> storage(max_out_size);
148
- int first_byte = 0;
149
- int first_byte_bits = 0;
147
+ uint8_t first_byte = 0;
148
+ size_t first_byte_bits = 0;
150
149
  if (is_first) {
151
150
  if (params.lgwin == 16) {
152
151
  first_byte = 0;
@@ -155,26 +154,23 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
155
154
  first_byte = 1;
156
155
  first_byte_bits = 7;
157
156
  } else {
158
- first_byte = ((params.lgwin - 17) << 1) | 1;
157
+ first_byte = static_cast<uint8_t>(((params.lgwin - 17) << 1) | 1);
159
158
  first_byte_bits = 4;
160
159
  }
161
160
  }
162
161
  storage[0] = static_cast<uint8_t>(first_byte);
163
- int storage_ix = first_byte_bits;
162
+ size_t storage_ix = first_byte_bits;
164
163
 
165
164
  // Store the meta-block to the temporary output.
166
- if (!StoreMetaBlock(&input[0], input_pos, input_size, mask,
167
- prev_byte, prev_byte2,
168
- is_last,
169
- num_direct_distance_codes,
170
- distance_postfix_bits,
171
- literal_context_mode,
172
- commands, num_commands,
173
- mb,
174
- &storage_ix, &storage[0])) {
175
- free(commands);
176
- return false;
177
- }
165
+ StoreMetaBlock(&input[0], input_pos, input_size, mask,
166
+ prev_byte, prev_byte2,
167
+ is_last,
168
+ num_direct_distance_codes,
169
+ distance_postfix_bits,
170
+ literal_context_mode,
171
+ commands, num_commands,
172
+ mb,
173
+ &storage_ix, &storage[0]);
178
174
  free(commands);
179
175
 
180
176
  // If this is not the last meta-block, store an empty metadata
@@ -189,11 +185,9 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
189
185
  if (input_size + 4 < output_size) {
190
186
  storage[0] = static_cast<uint8_t>(first_byte);
191
187
  storage_ix = first_byte_bits;
192
- if (!StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
193
- input_size,
194
- &storage_ix, &storage[0])) {
195
- return false;
196
- }
188
+ StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
189
+ input_size,
190
+ &storage_ix, &storage[0]);
197
191
  output_size = storage_ix >> 3;
198
192
  }
199
193
 
@@ -239,19 +233,23 @@ int BrotliCompressBufferParallel(BrotliParams params,
239
233
  params.lgblock = kMaxInputBlockBits;
240
234
  }
241
235
  size_t max_input_block_size = 1 << params.lgblock;
236
+ size_t max_prefix_size = 1u << params.lgwin;
242
237
 
243
238
  std::vector<std::vector<uint8_t> > compressed_pieces;
244
239
 
245
240
  // Compress block-by-block independently.
246
241
  for (size_t pos = 0; pos < input_size; ) {
247
- size_t input_block_size = std::min(max_input_block_size, input_size - pos);
242
+ uint32_t input_block_size =
243
+ static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
244
+ uint32_t prefix_size =
245
+ static_cast<uint32_t>(std::min(max_prefix_size, pos));
248
246
  size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
249
247
  std::vector<uint8_t> out(out_size);
250
248
  if (!WriteMetaBlockParallel(params,
251
249
  input_block_size,
252
250
  &input_buffer[pos],
253
- pos,
254
- input_buffer,
251
+ prefix_size,
252
+ &input_buffer[pos - prefix_size],
255
253
  pos == 0,
256
254
  pos + input_block_size == input_size,
257
255
  &out_size,