brotli 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/README.md +36 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/brotli.gemspec +28 -0
- data/ext/brotli/brotli.cc +67 -0
- data/ext/brotli/brotli.h +9 -0
- data/ext/brotli/extconf.rb +34 -0
- data/lib/brotli.rb +2 -0
- data/lib/brotli/version.rb +3 -0
- data/vendor/brotli/LICENSE +202 -0
- data/vendor/brotli/dec/Makefile +12 -0
- data/vendor/brotli/dec/bit_reader.c +55 -0
- data/vendor/brotli/dec/bit_reader.h +256 -0
- data/vendor/brotli/dec/context.h +260 -0
- data/vendor/brotli/dec/decode.c +1573 -0
- data/vendor/brotli/dec/decode.h +160 -0
- data/vendor/brotli/dec/dictionary.h +9494 -0
- data/vendor/brotli/dec/huffman.c +325 -0
- data/vendor/brotli/dec/huffman.h +77 -0
- data/vendor/brotli/dec/port.h +148 -0
- data/vendor/brotli/dec/prefix.h +756 -0
- data/vendor/brotli/dec/state.c +149 -0
- data/vendor/brotli/dec/state.h +185 -0
- data/vendor/brotli/dec/streams.c +99 -0
- data/vendor/brotli/dec/streams.h +100 -0
- data/vendor/brotli/dec/transform.h +315 -0
- data/vendor/brotli/dec/types.h +36 -0
- data/vendor/brotli/enc/Makefile +11 -0
- data/vendor/brotli/enc/backward_references.cc +769 -0
- data/vendor/brotli/enc/backward_references.h +50 -0
- data/vendor/brotli/enc/bit_cost.h +147 -0
- data/vendor/brotli/enc/block_splitter.cc +418 -0
- data/vendor/brotli/enc/block_splitter.h +78 -0
- data/vendor/brotli/enc/brotli_bit_stream.cc +884 -0
- data/vendor/brotli/enc/brotli_bit_stream.h +149 -0
- data/vendor/brotli/enc/cluster.h +290 -0
- data/vendor/brotli/enc/command.h +140 -0
- data/vendor/brotli/enc/context.h +185 -0
- data/vendor/brotli/enc/dictionary.h +9485 -0
- data/vendor/brotli/enc/dictionary_hash.h +4125 -0
- data/vendor/brotli/enc/encode.cc +715 -0
- data/vendor/brotli/enc/encode.h +196 -0
- data/vendor/brotli/enc/encode_parallel.cc +354 -0
- data/vendor/brotli/enc/encode_parallel.h +37 -0
- data/vendor/brotli/enc/entropy_encode.cc +492 -0
- data/vendor/brotli/enc/entropy_encode.h +88 -0
- data/vendor/brotli/enc/fast_log.h +179 -0
- data/vendor/brotli/enc/find_match_length.h +87 -0
- data/vendor/brotli/enc/hash.h +686 -0
- data/vendor/brotli/enc/histogram.cc +76 -0
- data/vendor/brotli/enc/histogram.h +100 -0
- data/vendor/brotli/enc/literal_cost.cc +172 -0
- data/vendor/brotli/enc/literal_cost.h +38 -0
- data/vendor/brotli/enc/metablock.cc +544 -0
- data/vendor/brotli/enc/metablock.h +88 -0
- data/vendor/brotli/enc/port.h +151 -0
- data/vendor/brotli/enc/prefix.h +85 -0
- data/vendor/brotli/enc/ringbuffer.h +108 -0
- data/vendor/brotli/enc/static_dict.cc +441 -0
- data/vendor/brotli/enc/static_dict.h +40 -0
- data/vendor/brotli/enc/static_dict_lut.h +12063 -0
- data/vendor/brotli/enc/streams.cc +127 -0
- data/vendor/brotli/enc/streams.h +129 -0
- data/vendor/brotli/enc/transform.h +250 -0
- data/vendor/brotli/enc/write_bits.h +91 -0
- metadata +171 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Build per-context histograms of literals, commands and distance codes.
|
16
|
+
|
17
|
+
#include "./histogram.h"
|
18
|
+
|
19
|
+
#include <stdint.h>
|
20
|
+
#include <cmath>
|
21
|
+
|
22
|
+
#include "./block_splitter.h"
|
23
|
+
#include "./command.h"
|
24
|
+
#include "./context.h"
|
25
|
+
#include "./prefix.h"
|
26
|
+
|
27
|
+
namespace brotli {
|
28
|
+
|
29
|
+
void BuildHistograms(
|
30
|
+
const Command* cmds,
|
31
|
+
const size_t num_commands,
|
32
|
+
const BlockSplit& literal_split,
|
33
|
+
const BlockSplit& insert_and_copy_split,
|
34
|
+
const BlockSplit& dist_split,
|
35
|
+
const uint8_t* ringbuffer,
|
36
|
+
size_t start_pos,
|
37
|
+
size_t mask,
|
38
|
+
uint8_t prev_byte,
|
39
|
+
uint8_t prev_byte2,
|
40
|
+
const std::vector<int>& context_modes,
|
41
|
+
std::vector<HistogramLiteral>* literal_histograms,
|
42
|
+
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
43
|
+
std::vector<HistogramDistance>* copy_dist_histograms) {
|
44
|
+
size_t pos = start_pos;
|
45
|
+
BlockSplitIterator literal_it(literal_split);
|
46
|
+
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
47
|
+
BlockSplitIterator dist_it(dist_split);
|
48
|
+
for (int i = 0; i < num_commands; ++i) {
|
49
|
+
const Command &cmd = cmds[i];
|
50
|
+
insert_and_copy_it.Next();
|
51
|
+
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
52
|
+
cmd.cmd_prefix_);
|
53
|
+
for (int j = 0; j < cmd.insert_len_; ++j) {
|
54
|
+
literal_it.Next();
|
55
|
+
int context = (literal_it.type_ << kLiteralContextBits) +
|
56
|
+
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
57
|
+
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
58
|
+
prev_byte2 = prev_byte;
|
59
|
+
prev_byte = ringbuffer[pos & mask];
|
60
|
+
++pos;
|
61
|
+
}
|
62
|
+
pos += cmd.copy_len_;
|
63
|
+
if (cmd.copy_len_ > 0) {
|
64
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
65
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
66
|
+
if (cmd.cmd_prefix_ >= 128) {
|
67
|
+
dist_it.Next();
|
68
|
+
int context = (dist_it.type_ << kDistanceContextBits) +
|
69
|
+
cmd.DistanceContext();
|
70
|
+
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
} // namespace brotli
|
@@ -0,0 +1,100 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Models the histograms of literals, commands and distance codes.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
18
|
+
#define BROTLI_ENC_HISTOGRAM_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <string.h>
|
22
|
+
#include <vector>
|
23
|
+
#include <utility>
|
24
|
+
#include "./command.h"
|
25
|
+
#include "./fast_log.h"
|
26
|
+
#include "./prefix.h"
|
27
|
+
|
28
|
+
namespace brotli {
|
29
|
+
|
30
|
+
class BlockSplit;
|
31
|
+
|
32
|
+
// A simple container for histograms of data in blocks.
|
33
|
+
template<int kDataSize>
|
34
|
+
struct Histogram {
|
35
|
+
Histogram() {
|
36
|
+
Clear();
|
37
|
+
}
|
38
|
+
void Clear() {
|
39
|
+
memset(data_, 0, sizeof(data_));
|
40
|
+
total_count_ = 0;
|
41
|
+
}
|
42
|
+
void Add(int val) {
|
43
|
+
++data_[val];
|
44
|
+
++total_count_;
|
45
|
+
}
|
46
|
+
void Remove(int val) {
|
47
|
+
--data_[val];
|
48
|
+
--total_count_;
|
49
|
+
}
|
50
|
+
template<typename DataType>
|
51
|
+
void Add(const DataType *p, size_t n) {
|
52
|
+
total_count_ += n;
|
53
|
+
n += 1;
|
54
|
+
while(--n) ++data_[*p++];
|
55
|
+
}
|
56
|
+
void AddHistogram(const Histogram& v) {
|
57
|
+
total_count_ += v.total_count_;
|
58
|
+
for (int i = 0; i < kDataSize; ++i) {
|
59
|
+
data_[i] += v.data_[i];
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
int data_[kDataSize];
|
64
|
+
int total_count_;
|
65
|
+
double bit_cost_;
|
66
|
+
};
|
67
|
+
|
68
|
+
// Literal histogram.
|
69
|
+
typedef Histogram<256> HistogramLiteral;
|
70
|
+
// Prefix histograms.
|
71
|
+
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
|
72
|
+
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
|
73
|
+
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
|
74
|
+
// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
|
75
|
+
typedef Histogram<272> HistogramContextMap;
|
76
|
+
// Block type histogram, 256 block types + 2 special symbols.
|
77
|
+
typedef Histogram<258> HistogramBlockType;
|
78
|
+
|
79
|
+
static const int kLiteralContextBits = 6;
|
80
|
+
static const int kDistanceContextBits = 2;
|
81
|
+
|
82
|
+
void BuildHistograms(
|
83
|
+
const Command* cmds,
|
84
|
+
const size_t num_commands,
|
85
|
+
const BlockSplit& literal_split,
|
86
|
+
const BlockSplit& insert_and_copy_split,
|
87
|
+
const BlockSplit& dist_split,
|
88
|
+
const uint8_t* ringbuffer,
|
89
|
+
size_t pos,
|
90
|
+
size_t mask,
|
91
|
+
uint8_t prev_byte,
|
92
|
+
uint8_t prev_byte2,
|
93
|
+
const std::vector<int>& context_modes,
|
94
|
+
std::vector<HistogramLiteral>* literal_histograms,
|
95
|
+
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
96
|
+
std::vector<HistogramDistance>* copy_dist_histograms);
|
97
|
+
|
98
|
+
} // namespace brotli
|
99
|
+
|
100
|
+
#endif // BROTLI_ENC_HISTOGRAM_H_
|
@@ -0,0 +1,172 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Literal cost model to allow backward reference replacement to be efficient.
|
16
|
+
|
17
|
+
#include "./literal_cost.h"
|
18
|
+
|
19
|
+
#include <math.h>
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <algorithm>
|
22
|
+
|
23
|
+
#include "./fast_log.h"
|
24
|
+
|
25
|
+
namespace brotli {
|
26
|
+
|
27
|
+
static int UTF8Position(int last, int c, int clamp) {
|
28
|
+
if (c < 128) {
|
29
|
+
return 0; // Next one is the 'Byte 1' again.
|
30
|
+
} else if (c >= 192) {
|
31
|
+
return std::min(1, clamp); // Next one is the 'Byte 2' of utf-8 encoding.
|
32
|
+
} else {
|
33
|
+
// Let's decide over the last byte if this ends the sequence.
|
34
|
+
if (last < 0xe0) {
|
35
|
+
return 0; // Completed two or three byte coding.
|
36
|
+
} else {
|
37
|
+
return std::min(2, clamp); // Next one is the 'Byte 3' of utf-8 encoding.
|
38
|
+
}
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
43
|
+
const uint8_t *data) {
|
44
|
+
int counts[3] = { 0 };
|
45
|
+
int max_utf8 = 1; // should be 2, but 1 compresses better.
|
46
|
+
int last_c = 0;
|
47
|
+
int utf8_pos = 0;
|
48
|
+
for (int i = 0; i < len; ++i) {
|
49
|
+
int c = data[(pos + i) & mask];
|
50
|
+
utf8_pos = UTF8Position(last_c, c, 2);
|
51
|
+
++counts[utf8_pos];
|
52
|
+
last_c = c;
|
53
|
+
}
|
54
|
+
if (counts[2] < 500) {
|
55
|
+
max_utf8 = 1;
|
56
|
+
}
|
57
|
+
if (counts[1] + counts[2] < 25) {
|
58
|
+
max_utf8 = 0;
|
59
|
+
}
|
60
|
+
return max_utf8;
|
61
|
+
}
|
62
|
+
|
63
|
+
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
64
|
+
size_t cost_mask, const uint8_t *data,
|
65
|
+
float *cost) {
|
66
|
+
|
67
|
+
// max_utf8 is 0 (normal ascii single byte modeling),
|
68
|
+
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
69
|
+
const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
70
|
+
int histogram[3][256] = { { 0 } };
|
71
|
+
int window_half = 495;
|
72
|
+
int in_window = std::min(static_cast<size_t>(window_half), len);
|
73
|
+
int in_window_utf8[3] = { 0 };
|
74
|
+
|
75
|
+
// Bootstrap histograms.
|
76
|
+
int last_c = 0;
|
77
|
+
int utf8_pos = 0;
|
78
|
+
for (int i = 0; i < in_window; ++i) {
|
79
|
+
int c = data[(pos + i) & mask];
|
80
|
+
++histogram[utf8_pos][c];
|
81
|
+
++in_window_utf8[utf8_pos];
|
82
|
+
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
83
|
+
last_c = c;
|
84
|
+
}
|
85
|
+
|
86
|
+
// Compute bit costs with sliding window.
|
87
|
+
for (int i = 0; i < len; ++i) {
|
88
|
+
if (i - window_half >= 0) {
|
89
|
+
// Remove a byte in the past.
|
90
|
+
int c = (i - window_half - 1) < 0 ?
|
91
|
+
0 : data[(pos + i - window_half - 1) & mask];
|
92
|
+
int last_c = (i - window_half - 2) < 0 ?
|
93
|
+
0 : data[(pos + i - window_half - 2) & mask];
|
94
|
+
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
95
|
+
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
96
|
+
--in_window_utf8[utf8_pos2];
|
97
|
+
}
|
98
|
+
if (i + window_half < len) {
|
99
|
+
// Add a byte in the future.
|
100
|
+
int c = (i + window_half - 1) < 0 ?
|
101
|
+
0 : data[(pos + i + window_half - 1) & mask];
|
102
|
+
int last_c = (i + window_half - 2) < 0 ?
|
103
|
+
0 : data[(pos + i + window_half - 2) & mask];
|
104
|
+
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
105
|
+
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
106
|
+
++in_window_utf8[utf8_pos2];
|
107
|
+
}
|
108
|
+
int c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
109
|
+
int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
110
|
+
int utf8_pos = UTF8Position(last_c, c, max_utf8);
|
111
|
+
int masked_pos = (pos + i) & mask;
|
112
|
+
int histo = histogram[utf8_pos][data[masked_pos]];
|
113
|
+
if (histo == 0) {
|
114
|
+
histo = 1;
|
115
|
+
}
|
116
|
+
float lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
117
|
+
lit_cost += 0.02905;
|
118
|
+
if (lit_cost < 1.0) {
|
119
|
+
lit_cost *= 0.5;
|
120
|
+
lit_cost += 0.5;
|
121
|
+
}
|
122
|
+
// Make the first bytes more expensive -- seems to help, not sure why.
|
123
|
+
// Perhaps because the entropy source is changing its properties
|
124
|
+
// rapidly in the beginning of the file, perhaps because the beginning
|
125
|
+
// of the data is a statistical "anomaly".
|
126
|
+
if (i < 2000) {
|
127
|
+
lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
|
128
|
+
}
|
129
|
+
cost[(pos + i) & cost_mask] = lit_cost;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
134
|
+
size_t cost_mask, const uint8_t *data,
|
135
|
+
float *cost) {
|
136
|
+
int histogram[256] = { 0 };
|
137
|
+
int window_half = 2000;
|
138
|
+
int in_window = std::min(static_cast<size_t>(window_half), len);
|
139
|
+
|
140
|
+
// Bootstrap histogram.
|
141
|
+
for (int i = 0; i < in_window; ++i) {
|
142
|
+
++histogram[data[(pos + i) & mask]];
|
143
|
+
}
|
144
|
+
|
145
|
+
// Compute bit costs with sliding window.
|
146
|
+
for (int i = 0; i < len; ++i) {
|
147
|
+
if (i - window_half >= 0) {
|
148
|
+
// Remove a byte in the past.
|
149
|
+
--histogram[data[(pos + i - window_half) & mask]];
|
150
|
+
--in_window;
|
151
|
+
}
|
152
|
+
if (i + window_half < len) {
|
153
|
+
// Add a byte in the future.
|
154
|
+
++histogram[data[(pos + i + window_half) & mask]];
|
155
|
+
++in_window;
|
156
|
+
}
|
157
|
+
int histo = histogram[data[(pos + i) & mask]];
|
158
|
+
if (histo == 0) {
|
159
|
+
histo = 1;
|
160
|
+
}
|
161
|
+
float lit_cost = FastLog2(in_window) - FastLog2(histo);
|
162
|
+
lit_cost += 0.029;
|
163
|
+
if (lit_cost < 1.0) {
|
164
|
+
lit_cost *= 0.5;
|
165
|
+
lit_cost += 0.5;
|
166
|
+
}
|
167
|
+
cost[(pos + i) & cost_mask] = lit_cost;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
|
172
|
+
} // namespace brotli
|
@@ -0,0 +1,38 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Literal cost model to allow backward reference replacement to be efficient.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
18
|
+
#define BROTLI_ENC_LITERAL_COST_H_
|
19
|
+
|
20
|
+
#include <stddef.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
|
23
|
+
namespace brotli {
|
24
|
+
|
25
|
+
// Estimates how many bits the literals in the interval [pos, pos + len) in the
|
26
|
+
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
27
|
+
// to the ringbuffer (cost, mask).
|
28
|
+
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
29
|
+
size_t cost_mask, const uint8_t *data,
|
30
|
+
float *cost);
|
31
|
+
|
32
|
+
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
33
|
+
size_t cost_mask, const uint8_t *data,
|
34
|
+
float *cost);
|
35
|
+
|
36
|
+
} // namespace brotli
|
37
|
+
|
38
|
+
#endif // BROTLI_ENC_LITERAL_COST_H_
|
@@ -0,0 +1,544 @@
|
|
1
|
+
// Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Algorithms for distributing the literals and commands of a metablock between
|
16
|
+
// block types and contexts.
|
17
|
+
|
18
|
+
#include "./metablock.h"
|
19
|
+
|
20
|
+
#include "./block_splitter.h"
|
21
|
+
#include "./context.h"
|
22
|
+
#include "./cluster.h"
|
23
|
+
#include "./histogram.h"
|
24
|
+
|
25
|
+
namespace brotli {
|
26
|
+
|
27
|
+
void BuildMetaBlock(const uint8_t* ringbuffer,
|
28
|
+
const size_t pos,
|
29
|
+
const size_t mask,
|
30
|
+
uint8_t prev_byte,
|
31
|
+
uint8_t prev_byte2,
|
32
|
+
const Command* cmds,
|
33
|
+
size_t num_commands,
|
34
|
+
int literal_context_mode,
|
35
|
+
MetaBlockSplit* mb) {
|
36
|
+
SplitBlock(cmds, num_commands,
|
37
|
+
ringbuffer, pos, mask,
|
38
|
+
&mb->literal_split,
|
39
|
+
&mb->command_split,
|
40
|
+
&mb->distance_split);
|
41
|
+
|
42
|
+
std::vector<int> literal_context_modes(mb->literal_split.num_types,
|
43
|
+
literal_context_mode);
|
44
|
+
|
45
|
+
int num_literal_contexts =
|
46
|
+
mb->literal_split.num_types << kLiteralContextBits;
|
47
|
+
int num_distance_contexts =
|
48
|
+
mb->distance_split.num_types << kDistanceContextBits;
|
49
|
+
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
50
|
+
mb->command_histograms.resize(mb->command_split.num_types);
|
51
|
+
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
52
|
+
BuildHistograms(cmds, num_commands,
|
53
|
+
mb->literal_split,
|
54
|
+
mb->command_split,
|
55
|
+
mb->distance_split,
|
56
|
+
ringbuffer,
|
57
|
+
pos,
|
58
|
+
mask,
|
59
|
+
prev_byte,
|
60
|
+
prev_byte2,
|
61
|
+
literal_context_modes,
|
62
|
+
&literal_histograms,
|
63
|
+
&mb->command_histograms,
|
64
|
+
&distance_histograms);
|
65
|
+
|
66
|
+
// Histogram ids need to fit in one byte.
|
67
|
+
static const int kMaxNumberOfHistograms = 256;
|
68
|
+
|
69
|
+
mb->literal_histograms = literal_histograms;
|
70
|
+
ClusterHistograms(literal_histograms,
|
71
|
+
1 << kLiteralContextBits,
|
72
|
+
mb->literal_split.num_types,
|
73
|
+
kMaxNumberOfHistograms,
|
74
|
+
&mb->literal_histograms,
|
75
|
+
&mb->literal_context_map);
|
76
|
+
|
77
|
+
mb->distance_histograms = distance_histograms;
|
78
|
+
ClusterHistograms(distance_histograms,
|
79
|
+
1 << kDistanceContextBits,
|
80
|
+
mb->distance_split.num_types,
|
81
|
+
kMaxNumberOfHistograms,
|
82
|
+
&mb->distance_histograms,
|
83
|
+
&mb->distance_context_map);
|
84
|
+
}
|
85
|
+
|
86
|
+
// Greedy block splitter for one block category (literal, command or distance).
|
87
|
+
template<typename HistogramType>
|
88
|
+
class BlockSplitter {
|
89
|
+
public:
|
90
|
+
BlockSplitter(int alphabet_size,
|
91
|
+
int min_block_size,
|
92
|
+
double split_threshold,
|
93
|
+
int num_symbols,
|
94
|
+
BlockSplit* split,
|
95
|
+
std::vector<HistogramType>* histograms)
|
96
|
+
: alphabet_size_(alphabet_size),
|
97
|
+
min_block_size_(min_block_size),
|
98
|
+
split_threshold_(split_threshold),
|
99
|
+
num_blocks_(0),
|
100
|
+
split_(split),
|
101
|
+
histograms_(histograms),
|
102
|
+
target_block_size_(min_block_size),
|
103
|
+
block_size_(0),
|
104
|
+
curr_histogram_ix_(0),
|
105
|
+
merge_last_count_(0) {
|
106
|
+
int max_num_blocks = num_symbols / min_block_size + 1;
|
107
|
+
// We have to allocate one more histogram than the maximum number of block
|
108
|
+
// types for the current histogram when the meta-block is too big.
|
109
|
+
int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
|
110
|
+
split_->lengths.resize(max_num_blocks);
|
111
|
+
split_->types.resize(max_num_blocks);
|
112
|
+
histograms_->resize(max_num_types);
|
113
|
+
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
114
|
+
}
|
115
|
+
|
116
|
+
// Adds the next symbol to the current histogram. When the current histogram
|
117
|
+
// reaches the target size, decides on merging the block.
|
118
|
+
void AddSymbol(int symbol) {
|
119
|
+
(*histograms_)[curr_histogram_ix_].Add(symbol);
|
120
|
+
++block_size_;
|
121
|
+
if (block_size_ == target_block_size_) {
|
122
|
+
FinishBlock(/* is_final = */ false);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
// Does either of three things:
|
127
|
+
// (1) emits the current block with a new block type;
|
128
|
+
// (2) emits the current block with the type of the second last block;
|
129
|
+
// (3) merges the current block with the last block.
|
130
|
+
void FinishBlock(bool is_final) {
|
131
|
+
if (block_size_ < min_block_size_) {
|
132
|
+
block_size_ = min_block_size_;
|
133
|
+
}
|
134
|
+
if (num_blocks_ == 0) {
|
135
|
+
// Create first block.
|
136
|
+
split_->lengths[0] = block_size_;
|
137
|
+
split_->types[0] = 0;
|
138
|
+
last_entropy_[0] =
|
139
|
+
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
|
140
|
+
last_entropy_[1] = last_entropy_[0];
|
141
|
+
++num_blocks_;
|
142
|
+
++split_->num_types;
|
143
|
+
++curr_histogram_ix_;
|
144
|
+
block_size_ = 0;
|
145
|
+
} else if (block_size_ > 0) {
|
146
|
+
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
|
147
|
+
alphabet_size_);
|
148
|
+
HistogramType combined_histo[2];
|
149
|
+
double combined_entropy[2];
|
150
|
+
double diff[2];
|
151
|
+
for (int j = 0; j < 2; ++j) {
|
152
|
+
int last_histogram_ix = last_histogram_ix_[j];
|
153
|
+
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
|
154
|
+
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
|
155
|
+
combined_entropy[j] = BitsEntropy(
|
156
|
+
&combined_histo[j].data_[0], alphabet_size_);
|
157
|
+
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
|
158
|
+
}
|
159
|
+
|
160
|
+
if (split_->num_types < kMaxBlockTypes &&
|
161
|
+
diff[0] > split_threshold_ &&
|
162
|
+
diff[1] > split_threshold_) {
|
163
|
+
// Create new block.
|
164
|
+
split_->lengths[num_blocks_] = block_size_;
|
165
|
+
split_->types[num_blocks_] = split_->num_types;
|
166
|
+
last_histogram_ix_[1] = last_histogram_ix_[0];
|
167
|
+
last_histogram_ix_[0] = split_->num_types;
|
168
|
+
last_entropy_[1] = last_entropy_[0];
|
169
|
+
last_entropy_[0] = entropy;
|
170
|
+
++num_blocks_;
|
171
|
+
++split_->num_types;
|
172
|
+
++curr_histogram_ix_;
|
173
|
+
block_size_ = 0;
|
174
|
+
merge_last_count_ = 0;
|
175
|
+
target_block_size_ = min_block_size_;
|
176
|
+
} else if (diff[1] < diff[0] - 20.0) {
|
177
|
+
// Combine this block with second last block.
|
178
|
+
split_->lengths[num_blocks_] = block_size_;
|
179
|
+
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
180
|
+
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
181
|
+
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
|
182
|
+
last_entropy_[1] = last_entropy_[0];
|
183
|
+
last_entropy_[0] = combined_entropy[1];
|
184
|
+
++num_blocks_;
|
185
|
+
block_size_ = 0;
|
186
|
+
(*histograms_)[curr_histogram_ix_].Clear();
|
187
|
+
merge_last_count_ = 0;
|
188
|
+
target_block_size_ = min_block_size_;
|
189
|
+
} else {
|
190
|
+
// Combine this block with last block.
|
191
|
+
split_->lengths[num_blocks_ - 1] += block_size_;
|
192
|
+
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
|
193
|
+
last_entropy_[0] = combined_entropy[0];
|
194
|
+
if (split_->num_types == 1) {
|
195
|
+
last_entropy_[1] = last_entropy_[0];
|
196
|
+
}
|
197
|
+
block_size_ = 0;
|
198
|
+
(*histograms_)[curr_histogram_ix_].Clear();
|
199
|
+
if (++merge_last_count_ > 1) {
|
200
|
+
target_block_size_ += min_block_size_;
|
201
|
+
}
|
202
|
+
}
|
203
|
+
}
|
204
|
+
if (is_final) {
|
205
|
+
(*histograms_).resize(split_->num_types);
|
206
|
+
split_->types.resize(num_blocks_);
|
207
|
+
split_->lengths.resize(num_blocks_);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
private:
|
212
|
+
static const int kMaxBlockTypes = 256;
|
213
|
+
|
214
|
+
// Alphabet size of particular block category.
|
215
|
+
const int alphabet_size_;
|
216
|
+
// We collect at least this many symbols for each block.
|
217
|
+
const int min_block_size_;
|
218
|
+
// We merge histograms A and B if
|
219
|
+
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
220
|
+
// where A is the current histogram and B is the histogram of the last or the
|
221
|
+
// second last block type.
|
222
|
+
const double split_threshold_;
|
223
|
+
|
224
|
+
int num_blocks_;
|
225
|
+
BlockSplit* split_; // not owned
|
226
|
+
std::vector<HistogramType>* histograms_; // not owned
|
227
|
+
|
228
|
+
// The number of symbols that we want to collect before deciding on whether
|
229
|
+
// or not to merge the block with a previous one or emit a new block.
|
230
|
+
int target_block_size_;
|
231
|
+
// The number of symbols in the current histogram.
|
232
|
+
int block_size_;
|
233
|
+
// Offset of the current histogram.
|
234
|
+
int curr_histogram_ix_;
|
235
|
+
// Offset of the histograms of the previous two block types.
|
236
|
+
int last_histogram_ix_[2];
|
237
|
+
// Entropy of the previous two block types.
|
238
|
+
double last_entropy_[2];
|
239
|
+
// The number of times we merged the current block with the last one.
|
240
|
+
int merge_last_count_;
|
241
|
+
};
|
242
|
+
|
243
|
+
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
244
|
+
size_t pos,
|
245
|
+
size_t mask,
|
246
|
+
const Command *commands,
|
247
|
+
size_t n_commands,
|
248
|
+
MetaBlockSplit* mb) {
|
249
|
+
int num_literals = 0;
|
250
|
+
for (int i = 0; i < n_commands; ++i) {
|
251
|
+
num_literals += commands[i].insert_len_;
|
252
|
+
}
|
253
|
+
|
254
|
+
BlockSplitter<HistogramLiteral> lit_blocks(
|
255
|
+
256, 512, 400.0, num_literals,
|
256
|
+
&mb->literal_split, &mb->literal_histograms);
|
257
|
+
BlockSplitter<HistogramCommand> cmd_blocks(
|
258
|
+
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
259
|
+
&mb->command_split, &mb->command_histograms);
|
260
|
+
BlockSplitter<HistogramDistance> dist_blocks(
|
261
|
+
64, 512, 100.0, n_commands,
|
262
|
+
&mb->distance_split, &mb->distance_histograms);
|
263
|
+
|
264
|
+
for (int i = 0; i < n_commands; ++i) {
|
265
|
+
const Command cmd = commands[i];
|
266
|
+
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
267
|
+
for (int j = 0; j < cmd.insert_len_; ++j) {
|
268
|
+
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
269
|
+
++pos;
|
270
|
+
}
|
271
|
+
pos += cmd.copy_len_;
|
272
|
+
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
273
|
+
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
lit_blocks.FinishBlock(/* is_final = */ true);
|
278
|
+
cmd_blocks.FinishBlock(/* is_final = */ true);
|
279
|
+
dist_blocks.FinishBlock(/* is_final = */ true);
|
280
|
+
}
|
281
|
+
|
282
|
+
// Greedy block splitter for one block category (literal, command or distance).
|
283
|
+
// Gathers histograms for all context buckets.
|
284
|
+
template<typename HistogramType>
|
285
|
+
class ContextBlockSplitter {
|
286
|
+
public:
|
287
|
+
ContextBlockSplitter(int alphabet_size,
|
288
|
+
int num_contexts,
|
289
|
+
int min_block_size,
|
290
|
+
double split_threshold,
|
291
|
+
int num_symbols,
|
292
|
+
BlockSplit* split,
|
293
|
+
std::vector<HistogramType>* histograms)
|
294
|
+
: alphabet_size_(alphabet_size),
|
295
|
+
num_contexts_(num_contexts),
|
296
|
+
max_block_types_(kMaxBlockTypes / num_contexts),
|
297
|
+
min_block_size_(min_block_size),
|
298
|
+
split_threshold_(split_threshold),
|
299
|
+
num_blocks_(0),
|
300
|
+
split_(split),
|
301
|
+
histograms_(histograms),
|
302
|
+
target_block_size_(min_block_size),
|
303
|
+
block_size_(0),
|
304
|
+
curr_histogram_ix_(0),
|
305
|
+
last_entropy_(2 * num_contexts),
|
306
|
+
merge_last_count_(0) {
|
307
|
+
int max_num_blocks = num_symbols / min_block_size + 1;
|
308
|
+
// We have to allocate one more histogram than the maximum number of block
|
309
|
+
// types for the current histogram when the meta-block is too big.
|
310
|
+
int max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
|
311
|
+
split_->lengths.resize(max_num_blocks);
|
312
|
+
split_->types.resize(max_num_blocks);
|
313
|
+
histograms_->resize(max_num_types * num_contexts);
|
314
|
+
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
315
|
+
}
|
316
|
+
|
317
|
+
// Adds the next symbol to the current block type and context. When the
|
318
|
+
// current block reaches the target size, decides on merging the block.
|
319
|
+
void AddSymbol(int symbol, int context) {
|
320
|
+
(*histograms_)[curr_histogram_ix_ + context].Add(symbol);
|
321
|
+
++block_size_;
|
322
|
+
if (block_size_ == target_block_size_) {
|
323
|
+
FinishBlock(/* is_final = */ false);
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
// Does either of three things:
|
328
|
+
// (1) emits the current block with a new block type;
|
329
|
+
// (2) emits the current block with the type of the second last block;
|
330
|
+
// (3) merges the current block with the last block.
|
331
|
+
void FinishBlock(bool is_final) {
|
332
|
+
if (block_size_ < min_block_size_) {
|
333
|
+
block_size_ = min_block_size_;
|
334
|
+
}
|
335
|
+
if (num_blocks_ == 0) {
|
336
|
+
// Create first block.
|
337
|
+
split_->lengths[0] = block_size_;
|
338
|
+
split_->types[0] = 0;
|
339
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
340
|
+
last_entropy_[i] =
|
341
|
+
BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
|
342
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
343
|
+
}
|
344
|
+
++num_blocks_;
|
345
|
+
++split_->num_types;
|
346
|
+
curr_histogram_ix_ += num_contexts_;
|
347
|
+
block_size_ = 0;
|
348
|
+
} else if (block_size_ > 0) {
|
349
|
+
// Try merging the set of histograms for the current block type with the
|
350
|
+
// respective set of histograms for the last and second last block types.
|
351
|
+
// Decide over the split based on the total reduction of entropy across
|
352
|
+
// all contexts.
|
353
|
+
std::vector<double> entropy(num_contexts_);
|
354
|
+
std::vector<HistogramType> combined_histo(2 * num_contexts_);
|
355
|
+
std::vector<double> combined_entropy(2 * num_contexts_);
|
356
|
+
double diff[2] = { 0.0 };
|
357
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
358
|
+
int curr_histo_ix = curr_histogram_ix_ + i;
|
359
|
+
entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
|
360
|
+
alphabet_size_);
|
361
|
+
for (int j = 0; j < 2; ++j) {
|
362
|
+
int jx = j * num_contexts_ + i;
|
363
|
+
int last_histogram_ix = last_histogram_ix_[j] + i;
|
364
|
+
combined_histo[jx] = (*histograms_)[curr_histo_ix];
|
365
|
+
combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
|
366
|
+
combined_entropy[jx] = BitsEntropy(
|
367
|
+
&combined_histo[jx].data_[0], alphabet_size_);
|
368
|
+
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
if (split_->num_types < max_block_types_ &&
|
373
|
+
diff[0] > split_threshold_ &&
|
374
|
+
diff[1] > split_threshold_) {
|
375
|
+
// Create new block.
|
376
|
+
split_->lengths[num_blocks_] = block_size_;
|
377
|
+
split_->types[num_blocks_] = split_->num_types;
|
378
|
+
last_histogram_ix_[1] = last_histogram_ix_[0];
|
379
|
+
last_histogram_ix_[0] = split_->num_types * num_contexts_;
|
380
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
381
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
382
|
+
last_entropy_[i] = entropy[i];
|
383
|
+
}
|
384
|
+
++num_blocks_;
|
385
|
+
++split_->num_types;
|
386
|
+
curr_histogram_ix_ += num_contexts_;
|
387
|
+
block_size_ = 0;
|
388
|
+
merge_last_count_ = 0;
|
389
|
+
target_block_size_ = min_block_size_;
|
390
|
+
} else if (diff[1] < diff[0] - 20.0) {
|
391
|
+
// Combine this block with second last block.
|
392
|
+
split_->lengths[num_blocks_] = block_size_;
|
393
|
+
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
394
|
+
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
395
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
396
|
+
(*histograms_)[last_histogram_ix_[0] + i] =
|
397
|
+
combined_histo[num_contexts_ + i];
|
398
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
399
|
+
last_entropy_[i] = combined_entropy[num_contexts_ + i];
|
400
|
+
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
401
|
+
}
|
402
|
+
++num_blocks_;
|
403
|
+
block_size_ = 0;
|
404
|
+
merge_last_count_ = 0;
|
405
|
+
target_block_size_ = min_block_size_;
|
406
|
+
} else {
|
407
|
+
// Combine this block with last block.
|
408
|
+
split_->lengths[num_blocks_ - 1] += block_size_;
|
409
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
410
|
+
(*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
|
411
|
+
last_entropy_[i] = combined_entropy[i];
|
412
|
+
if (split_->num_types == 1) {
|
413
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
414
|
+
}
|
415
|
+
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
416
|
+
}
|
417
|
+
block_size_ = 0;
|
418
|
+
if (++merge_last_count_ > 1) {
|
419
|
+
target_block_size_ += min_block_size_;
|
420
|
+
}
|
421
|
+
}
|
422
|
+
}
|
423
|
+
if (is_final) {
|
424
|
+
(*histograms_).resize(split_->num_types * num_contexts_);
|
425
|
+
split_->types.resize(num_blocks_);
|
426
|
+
split_->lengths.resize(num_blocks_);
|
427
|
+
}
|
428
|
+
}
|
429
|
+
|
430
|
+
private:
|
431
|
+
static const int kMaxBlockTypes = 256;
|
432
|
+
|
433
|
+
// Alphabet size of particular block category.
|
434
|
+
const int alphabet_size_;
|
435
|
+
const int num_contexts_;
|
436
|
+
const int max_block_types_;
|
437
|
+
// We collect at least this many symbols for each block.
|
438
|
+
const int min_block_size_;
|
439
|
+
// We merge histograms A and B if
|
440
|
+
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
441
|
+
// where A is the current histogram and B is the histogram of the last or the
|
442
|
+
// second last block type.
|
443
|
+
const double split_threshold_;
|
444
|
+
|
445
|
+
int num_blocks_;
|
446
|
+
BlockSplit* split_; // not owned
|
447
|
+
std::vector<HistogramType>* histograms_; // not owned
|
448
|
+
|
449
|
+
// The number of symbols that we want to collect before deciding on whether
|
450
|
+
// or not to merge the block with a previous one or emit a new block.
|
451
|
+
int target_block_size_;
|
452
|
+
// The number of symbols in the current histogram.
|
453
|
+
int block_size_;
|
454
|
+
// Offset of the current histogram.
|
455
|
+
int curr_histogram_ix_;
|
456
|
+
// Offset of the histograms of the previous two block types.
|
457
|
+
int last_histogram_ix_[2];
|
458
|
+
// Entropy of the previous two block types.
|
459
|
+
std::vector<double> last_entropy_;
|
460
|
+
// The number of times we merged the current block with the last one.
|
461
|
+
int merge_last_count_;
|
462
|
+
};
|
463
|
+
|
464
|
+
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
465
|
+
size_t pos,
|
466
|
+
size_t mask,
|
467
|
+
uint8_t prev_byte,
|
468
|
+
uint8_t prev_byte2,
|
469
|
+
int literal_context_mode,
|
470
|
+
int num_contexts,
|
471
|
+
const int* static_context_map,
|
472
|
+
const Command *commands,
|
473
|
+
size_t n_commands,
|
474
|
+
MetaBlockSplit* mb) {
|
475
|
+
int num_literals = 0;
|
476
|
+
for (int i = 0; i < n_commands; ++i) {
|
477
|
+
num_literals += commands[i].insert_len_;
|
478
|
+
}
|
479
|
+
|
480
|
+
ContextBlockSplitter<HistogramLiteral> lit_blocks(
|
481
|
+
256, num_contexts, 512, 400.0, num_literals,
|
482
|
+
&mb->literal_split, &mb->literal_histograms);
|
483
|
+
BlockSplitter<HistogramCommand> cmd_blocks(
|
484
|
+
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
485
|
+
&mb->command_split, &mb->command_histograms);
|
486
|
+
BlockSplitter<HistogramDistance> dist_blocks(
|
487
|
+
64, 512, 100.0, n_commands,
|
488
|
+
&mb->distance_split, &mb->distance_histograms);
|
489
|
+
|
490
|
+
for (int i = 0; i < n_commands; ++i) {
|
491
|
+
const Command cmd = commands[i];
|
492
|
+
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
493
|
+
for (int j = 0; j < cmd.insert_len_; ++j) {
|
494
|
+
int context = Context(prev_byte, prev_byte2, literal_context_mode);
|
495
|
+
uint8_t literal = ringbuffer[pos & mask];
|
496
|
+
lit_blocks.AddSymbol(literal, static_context_map[context]);
|
497
|
+
prev_byte2 = prev_byte;
|
498
|
+
prev_byte = literal;
|
499
|
+
++pos;
|
500
|
+
}
|
501
|
+
pos += cmd.copy_len_;
|
502
|
+
if (cmd.copy_len_ > 0) {
|
503
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
504
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
505
|
+
if (cmd.cmd_prefix_ >= 128) {
|
506
|
+
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
507
|
+
}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
|
511
|
+
lit_blocks.FinishBlock(/* is_final = */ true);
|
512
|
+
cmd_blocks.FinishBlock(/* is_final = */ true);
|
513
|
+
dist_blocks.FinishBlock(/* is_final = */ true);
|
514
|
+
|
515
|
+
mb->literal_context_map.resize(
|
516
|
+
mb->literal_split.num_types << kLiteralContextBits);
|
517
|
+
for (int i = 0; i < mb->literal_split.num_types; ++i) {
|
518
|
+
for (int j = 0; j < (1 << kLiteralContextBits); ++j) {
|
519
|
+
mb->literal_context_map[(i << kLiteralContextBits) + j] =
|
520
|
+
i * num_contexts + static_context_map[j];
|
521
|
+
}
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
void OptimizeHistograms(int num_direct_distance_codes,
|
526
|
+
int distance_postfix_bits,
|
527
|
+
MetaBlockSplit* mb) {
|
528
|
+
for (int i = 0; i < mb->literal_histograms.size(); ++i) {
|
529
|
+
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
|
530
|
+
}
|
531
|
+
for (int i = 0; i < mb->command_histograms.size(); ++i) {
|
532
|
+
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
|
533
|
+
&mb->command_histograms[i].data_[0]);
|
534
|
+
}
|
535
|
+
int num_distance_codes =
|
536
|
+
kNumDistanceShortCodes + num_direct_distance_codes +
|
537
|
+
(48 << distance_postfix_bits);
|
538
|
+
for (int i = 0; i < mb->distance_histograms.size(); ++i) {
|
539
|
+
OptimizeHuffmanCountsForRle(num_distance_codes,
|
540
|
+
&mb->distance_histograms[i].data_[0]);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
544
|
+
} // namespace brotli
|