extbrotli 0.0.1.PROTOTYPE
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Build per-context histograms of literals, commands and distance codes.
|
16
|
+
|
17
|
+
#include "./histogram.h"
|
18
|
+
|
19
|
+
#include <stdint.h>
|
20
|
+
#include <cmath>
|
21
|
+
|
22
|
+
#include "./block_splitter.h"
|
23
|
+
#include "./command.h"
|
24
|
+
#include "./context.h"
|
25
|
+
#include "./prefix.h"
|
26
|
+
|
27
|
+
namespace brotli {
|
28
|
+
|
29
|
+
void BuildHistograms(
|
30
|
+
const Command* cmds,
|
31
|
+
const size_t num_commands,
|
32
|
+
const BlockSplit& literal_split,
|
33
|
+
const BlockSplit& insert_and_copy_split,
|
34
|
+
const BlockSplit& dist_split,
|
35
|
+
const uint8_t* ringbuffer,
|
36
|
+
size_t start_pos,
|
37
|
+
size_t mask,
|
38
|
+
uint8_t prev_byte,
|
39
|
+
uint8_t prev_byte2,
|
40
|
+
const std::vector<int>& context_modes,
|
41
|
+
std::vector<HistogramLiteral>* literal_histograms,
|
42
|
+
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
43
|
+
std::vector<HistogramDistance>* copy_dist_histograms) {
|
44
|
+
size_t pos = start_pos;
|
45
|
+
BlockSplitIterator literal_it(literal_split);
|
46
|
+
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
47
|
+
BlockSplitIterator dist_it(dist_split);
|
48
|
+
for (int i = 0; i < num_commands; ++i) {
|
49
|
+
const Command &cmd = cmds[i];
|
50
|
+
insert_and_copy_it.Next();
|
51
|
+
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
52
|
+
cmd.cmd_prefix_);
|
53
|
+
for (int j = 0; j < cmd.insert_len_; ++j) {
|
54
|
+
literal_it.Next();
|
55
|
+
int context = (literal_it.type_ << kLiteralContextBits) +
|
56
|
+
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
57
|
+
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
58
|
+
prev_byte2 = prev_byte;
|
59
|
+
prev_byte = ringbuffer[pos & mask];
|
60
|
+
++pos;
|
61
|
+
}
|
62
|
+
pos += cmd.copy_len_;
|
63
|
+
if (cmd.copy_len_ > 0) {
|
64
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
65
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
66
|
+
if (cmd.cmd_prefix_ >= 128) {
|
67
|
+
dist_it.Next();
|
68
|
+
int context = (dist_it.type_ << kDistanceContextBits) +
|
69
|
+
cmd.DistanceContext();
|
70
|
+
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
} // namespace brotli
|
@@ -0,0 +1,100 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Models the histograms of literals, commands and distance codes.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
18
|
+
#define BROTLI_ENC_HISTOGRAM_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <string.h>
|
22
|
+
#include <vector>
|
23
|
+
#include <utility>
|
24
|
+
#include "./command.h"
|
25
|
+
#include "./fast_log.h"
|
26
|
+
#include "./prefix.h"
|
27
|
+
|
28
|
+
namespace brotli {
|
29
|
+
|
30
|
+
class BlockSplit;
|
31
|
+
|
32
|
+
// A simple container for histograms of data in blocks.
|
33
|
+
template<int kDataSize>
|
34
|
+
struct Histogram {
|
35
|
+
Histogram() {
|
36
|
+
Clear();
|
37
|
+
}
|
38
|
+
void Clear() {
|
39
|
+
memset(data_, 0, sizeof(data_));
|
40
|
+
total_count_ = 0;
|
41
|
+
}
|
42
|
+
void Add(int val) {
|
43
|
+
++data_[val];
|
44
|
+
++total_count_;
|
45
|
+
}
|
46
|
+
void Remove(int val) {
|
47
|
+
--data_[val];
|
48
|
+
--total_count_;
|
49
|
+
}
|
50
|
+
template<typename DataType>
|
51
|
+
void Add(const DataType *p, size_t n) {
|
52
|
+
total_count_ += n;
|
53
|
+
n += 1;
|
54
|
+
while(--n) ++data_[*p++];
|
55
|
+
}
|
56
|
+
void AddHistogram(const Histogram& v) {
|
57
|
+
total_count_ += v.total_count_;
|
58
|
+
for (int i = 0; i < kDataSize; ++i) {
|
59
|
+
data_[i] += v.data_[i];
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
int data_[kDataSize];
|
64
|
+
int total_count_;
|
65
|
+
double bit_cost_;
|
66
|
+
};
|
67
|
+
|
68
|
+
// Literal histogram.
|
69
|
+
typedef Histogram<256> HistogramLiteral;
|
70
|
+
// Prefix histograms.
|
71
|
+
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
|
72
|
+
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
|
73
|
+
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
|
74
|
+
// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
|
75
|
+
typedef Histogram<272> HistogramContextMap;
|
76
|
+
// Block type histogram, 256 block types + 2 special symbols.
|
77
|
+
typedef Histogram<258> HistogramBlockType;
|
78
|
+
|
79
|
+
static const int kLiteralContextBits = 6;
|
80
|
+
static const int kDistanceContextBits = 2;
|
81
|
+
|
82
|
+
void BuildHistograms(
|
83
|
+
const Command* cmds,
|
84
|
+
const size_t num_commands,
|
85
|
+
const BlockSplit& literal_split,
|
86
|
+
const BlockSplit& insert_and_copy_split,
|
87
|
+
const BlockSplit& dist_split,
|
88
|
+
const uint8_t* ringbuffer,
|
89
|
+
size_t pos,
|
90
|
+
size_t mask,
|
91
|
+
uint8_t prev_byte,
|
92
|
+
uint8_t prev_byte2,
|
93
|
+
const std::vector<int>& context_modes,
|
94
|
+
std::vector<HistogramLiteral>* literal_histograms,
|
95
|
+
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
96
|
+
std::vector<HistogramDistance>* copy_dist_histograms);
|
97
|
+
|
98
|
+
} // namespace brotli
|
99
|
+
|
100
|
+
#endif // BROTLI_ENC_HISTOGRAM_H_
|
@@ -0,0 +1,172 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Literal cost model to allow backward reference replacement to be efficient.
|
16
|
+
|
17
|
+
#include "./literal_cost.h"
|
18
|
+
|
19
|
+
#include <math.h>
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <algorithm>
|
22
|
+
|
23
|
+
#include "./fast_log.h"
|
24
|
+
|
25
|
+
namespace brotli {
|
26
|
+
|
27
|
+
static int UTF8Position(int last, int c, int clamp) {
|
28
|
+
if (c < 128) {
|
29
|
+
return 0; // Next one is the 'Byte 1' again.
|
30
|
+
} else if (c >= 192) {
|
31
|
+
return std::min(1, clamp); // Next one is the 'Byte 2' of utf-8 encoding.
|
32
|
+
} else {
|
33
|
+
// Let's decide over the last byte if this ends the sequence.
|
34
|
+
if (last < 0xe0) {
|
35
|
+
return 0; // Completed two or three byte coding.
|
36
|
+
} else {
|
37
|
+
return std::min(2, clamp); // Next one is the 'Byte 3' of utf-8 encoding.
|
38
|
+
}
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
43
|
+
const uint8_t *data) {
|
44
|
+
int counts[3] = { 0 };
|
45
|
+
int max_utf8 = 1; // should be 2, but 1 compresses better.
|
46
|
+
int last_c = 0;
|
47
|
+
int utf8_pos = 0;
|
48
|
+
for (int i = 0; i < len; ++i) {
|
49
|
+
int c = data[(pos + i) & mask];
|
50
|
+
utf8_pos = UTF8Position(last_c, c, 2);
|
51
|
+
++counts[utf8_pos];
|
52
|
+
last_c = c;
|
53
|
+
}
|
54
|
+
if (counts[2] < 500) {
|
55
|
+
max_utf8 = 1;
|
56
|
+
}
|
57
|
+
if (counts[1] + counts[2] < 25) {
|
58
|
+
max_utf8 = 0;
|
59
|
+
}
|
60
|
+
return max_utf8;
|
61
|
+
}
|
62
|
+
|
63
|
+
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
64
|
+
size_t cost_mask, const uint8_t *data,
|
65
|
+
float *cost) {
|
66
|
+
|
67
|
+
// max_utf8 is 0 (normal ascii single byte modeling),
|
68
|
+
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
69
|
+
const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
70
|
+
int histogram[3][256] = { { 0 } };
|
71
|
+
int window_half = 495;
|
72
|
+
int in_window = std::min(static_cast<size_t>(window_half), len);
|
73
|
+
int in_window_utf8[3] = { 0 };
|
74
|
+
|
75
|
+
// Bootstrap histograms.
|
76
|
+
int last_c = 0;
|
77
|
+
int utf8_pos = 0;
|
78
|
+
for (int i = 0; i < in_window; ++i) {
|
79
|
+
int c = data[(pos + i) & mask];
|
80
|
+
++histogram[utf8_pos][c];
|
81
|
+
++in_window_utf8[utf8_pos];
|
82
|
+
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
83
|
+
last_c = c;
|
84
|
+
}
|
85
|
+
|
86
|
+
// Compute bit costs with sliding window.
|
87
|
+
for (int i = 0; i < len; ++i) {
|
88
|
+
if (i - window_half >= 0) {
|
89
|
+
// Remove a byte in the past.
|
90
|
+
int c = (i - window_half - 1) < 0 ?
|
91
|
+
0 : data[(pos + i - window_half - 1) & mask];
|
92
|
+
int last_c = (i - window_half - 2) < 0 ?
|
93
|
+
0 : data[(pos + i - window_half - 2) & mask];
|
94
|
+
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
95
|
+
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
96
|
+
--in_window_utf8[utf8_pos2];
|
97
|
+
}
|
98
|
+
if (i + window_half < len) {
|
99
|
+
// Add a byte in the future.
|
100
|
+
int c = (i + window_half - 1) < 0 ?
|
101
|
+
0 : data[(pos + i + window_half - 1) & mask];
|
102
|
+
int last_c = (i + window_half - 2) < 0 ?
|
103
|
+
0 : data[(pos + i + window_half - 2) & mask];
|
104
|
+
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
105
|
+
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
106
|
+
++in_window_utf8[utf8_pos2];
|
107
|
+
}
|
108
|
+
int c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
109
|
+
int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
110
|
+
int utf8_pos = UTF8Position(last_c, c, max_utf8);
|
111
|
+
int masked_pos = (pos + i) & mask;
|
112
|
+
int histo = histogram[utf8_pos][data[masked_pos]];
|
113
|
+
if (histo == 0) {
|
114
|
+
histo = 1;
|
115
|
+
}
|
116
|
+
float lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
117
|
+
lit_cost += 0.02905;
|
118
|
+
if (lit_cost < 1.0) {
|
119
|
+
lit_cost *= 0.5;
|
120
|
+
lit_cost += 0.5;
|
121
|
+
}
|
122
|
+
// Make the first bytes more expensive -- seems to help, not sure why.
|
123
|
+
// Perhaps because the entropy source is changing its properties
|
124
|
+
// rapidly in the beginning of the file, perhaps because the beginning
|
125
|
+
// of the data is a statistical "anomaly".
|
126
|
+
if (i < 2000) {
|
127
|
+
lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
|
128
|
+
}
|
129
|
+
cost[(pos + i) & cost_mask] = lit_cost;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
134
|
+
size_t cost_mask, const uint8_t *data,
|
135
|
+
float *cost) {
|
136
|
+
int histogram[256] = { 0 };
|
137
|
+
int window_half = 2000;
|
138
|
+
int in_window = std::min(static_cast<size_t>(window_half), len);
|
139
|
+
|
140
|
+
// Bootstrap histogram.
|
141
|
+
for (int i = 0; i < in_window; ++i) {
|
142
|
+
++histogram[data[(pos + i) & mask]];
|
143
|
+
}
|
144
|
+
|
145
|
+
// Compute bit costs with sliding window.
|
146
|
+
for (int i = 0; i < len; ++i) {
|
147
|
+
if (i - window_half >= 0) {
|
148
|
+
// Remove a byte in the past.
|
149
|
+
--histogram[data[(pos + i - window_half) & mask]];
|
150
|
+
--in_window;
|
151
|
+
}
|
152
|
+
if (i + window_half < len) {
|
153
|
+
// Add a byte in the future.
|
154
|
+
++histogram[data[(pos + i + window_half) & mask]];
|
155
|
+
++in_window;
|
156
|
+
}
|
157
|
+
int histo = histogram[data[(pos + i) & mask]];
|
158
|
+
if (histo == 0) {
|
159
|
+
histo = 1;
|
160
|
+
}
|
161
|
+
float lit_cost = FastLog2(in_window) - FastLog2(histo);
|
162
|
+
lit_cost += 0.029;
|
163
|
+
if (lit_cost < 1.0) {
|
164
|
+
lit_cost *= 0.5;
|
165
|
+
lit_cost += 0.5;
|
166
|
+
}
|
167
|
+
cost[(pos + i) & cost_mask] = lit_cost;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
|
172
|
+
} // namespace brotli
|
@@ -0,0 +1,38 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Literal cost model to allow backward reference replacement to be efficient.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
18
|
+
#define BROTLI_ENC_LITERAL_COST_H_
|
19
|
+
|
20
|
+
#include <stddef.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
|
23
|
+
namespace brotli {
|
24
|
+
|
25
|
+
// Estimates how many bits the literals in the interval [pos, pos + len) in the
|
26
|
+
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
27
|
+
// to the ringbuffer (cost, mask).
|
28
|
+
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
29
|
+
size_t cost_mask, const uint8_t *data,
|
30
|
+
float *cost);
|
31
|
+
|
32
|
+
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
33
|
+
size_t cost_mask, const uint8_t *data,
|
34
|
+
float *cost);
|
35
|
+
|
36
|
+
} // namespace brotli
|
37
|
+
|
38
|
+
#endif // BROTLI_ENC_LITERAL_COST_H_
|
@@ -0,0 +1,544 @@
|
|
1
|
+
// Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Algorithms for distributing the literals and commands of a metablock between
|
16
|
+
// block types and contexts.
|
17
|
+
|
18
|
+
#include "./metablock.h"
|
19
|
+
|
20
|
+
#include "./block_splitter.h"
|
21
|
+
#include "./context.h"
|
22
|
+
#include "./cluster.h"
|
23
|
+
#include "./histogram.h"
|
24
|
+
|
25
|
+
namespace brotli {
|
26
|
+
|
27
|
+
void BuildMetaBlock(const uint8_t* ringbuffer,
|
28
|
+
const size_t pos,
|
29
|
+
const size_t mask,
|
30
|
+
uint8_t prev_byte,
|
31
|
+
uint8_t prev_byte2,
|
32
|
+
const Command* cmds,
|
33
|
+
size_t num_commands,
|
34
|
+
int literal_context_mode,
|
35
|
+
MetaBlockSplit* mb) {
|
36
|
+
SplitBlock(cmds, num_commands,
|
37
|
+
ringbuffer, pos, mask,
|
38
|
+
&mb->literal_split,
|
39
|
+
&mb->command_split,
|
40
|
+
&mb->distance_split);
|
41
|
+
|
42
|
+
std::vector<int> literal_context_modes(mb->literal_split.num_types,
|
43
|
+
literal_context_mode);
|
44
|
+
|
45
|
+
int num_literal_contexts =
|
46
|
+
mb->literal_split.num_types << kLiteralContextBits;
|
47
|
+
int num_distance_contexts =
|
48
|
+
mb->distance_split.num_types << kDistanceContextBits;
|
49
|
+
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
50
|
+
mb->command_histograms.resize(mb->command_split.num_types);
|
51
|
+
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
52
|
+
BuildHistograms(cmds, num_commands,
|
53
|
+
mb->literal_split,
|
54
|
+
mb->command_split,
|
55
|
+
mb->distance_split,
|
56
|
+
ringbuffer,
|
57
|
+
pos,
|
58
|
+
mask,
|
59
|
+
prev_byte,
|
60
|
+
prev_byte2,
|
61
|
+
literal_context_modes,
|
62
|
+
&literal_histograms,
|
63
|
+
&mb->command_histograms,
|
64
|
+
&distance_histograms);
|
65
|
+
|
66
|
+
// Histogram ids need to fit in one byte.
|
67
|
+
static const int kMaxNumberOfHistograms = 256;
|
68
|
+
|
69
|
+
mb->literal_histograms = literal_histograms;
|
70
|
+
ClusterHistograms(literal_histograms,
|
71
|
+
1 << kLiteralContextBits,
|
72
|
+
mb->literal_split.num_types,
|
73
|
+
kMaxNumberOfHistograms,
|
74
|
+
&mb->literal_histograms,
|
75
|
+
&mb->literal_context_map);
|
76
|
+
|
77
|
+
mb->distance_histograms = distance_histograms;
|
78
|
+
ClusterHistograms(distance_histograms,
|
79
|
+
1 << kDistanceContextBits,
|
80
|
+
mb->distance_split.num_types,
|
81
|
+
kMaxNumberOfHistograms,
|
82
|
+
&mb->distance_histograms,
|
83
|
+
&mb->distance_context_map);
|
84
|
+
}
|
85
|
+
|
86
|
+
// Greedy block splitter for one block category (literal, command or distance).
|
87
|
+
template<typename HistogramType>
|
88
|
+
class BlockSplitter {
|
89
|
+
public:
|
90
|
+
BlockSplitter(int alphabet_size,
|
91
|
+
int min_block_size,
|
92
|
+
double split_threshold,
|
93
|
+
int num_symbols,
|
94
|
+
BlockSplit* split,
|
95
|
+
std::vector<HistogramType>* histograms)
|
96
|
+
: alphabet_size_(alphabet_size),
|
97
|
+
min_block_size_(min_block_size),
|
98
|
+
split_threshold_(split_threshold),
|
99
|
+
num_blocks_(0),
|
100
|
+
split_(split),
|
101
|
+
histograms_(histograms),
|
102
|
+
target_block_size_(min_block_size),
|
103
|
+
block_size_(0),
|
104
|
+
curr_histogram_ix_(0),
|
105
|
+
merge_last_count_(0) {
|
106
|
+
int max_num_blocks = num_symbols / min_block_size + 1;
|
107
|
+
// We have to allocate one more histogram than the maximum number of block
|
108
|
+
// types for the current histogram when the meta-block is too big.
|
109
|
+
int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
|
110
|
+
split_->lengths.resize(max_num_blocks);
|
111
|
+
split_->types.resize(max_num_blocks);
|
112
|
+
histograms_->resize(max_num_types);
|
113
|
+
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
114
|
+
}
|
115
|
+
|
116
|
+
// Adds the next symbol to the current histogram. When the current histogram
|
117
|
+
// reaches the target size, decides on merging the block.
|
118
|
+
void AddSymbol(int symbol) {
|
119
|
+
(*histograms_)[curr_histogram_ix_].Add(symbol);
|
120
|
+
++block_size_;
|
121
|
+
if (block_size_ == target_block_size_) {
|
122
|
+
FinishBlock(/* is_final = */ false);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
// Does either of three things:
|
127
|
+
// (1) emits the current block with a new block type;
|
128
|
+
// (2) emits the current block with the type of the second last block;
|
129
|
+
// (3) merges the current block with the last block.
|
130
|
+
void FinishBlock(bool is_final) {
|
131
|
+
if (block_size_ < min_block_size_) {
|
132
|
+
block_size_ = min_block_size_;
|
133
|
+
}
|
134
|
+
if (num_blocks_ == 0) {
|
135
|
+
// Create first block.
|
136
|
+
split_->lengths[0] = block_size_;
|
137
|
+
split_->types[0] = 0;
|
138
|
+
last_entropy_[0] =
|
139
|
+
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
|
140
|
+
last_entropy_[1] = last_entropy_[0];
|
141
|
+
++num_blocks_;
|
142
|
+
++split_->num_types;
|
143
|
+
++curr_histogram_ix_;
|
144
|
+
block_size_ = 0;
|
145
|
+
} else if (block_size_ > 0) {
|
146
|
+
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
|
147
|
+
alphabet_size_);
|
148
|
+
HistogramType combined_histo[2];
|
149
|
+
double combined_entropy[2];
|
150
|
+
double diff[2];
|
151
|
+
for (int j = 0; j < 2; ++j) {
|
152
|
+
int last_histogram_ix = last_histogram_ix_[j];
|
153
|
+
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
|
154
|
+
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
|
155
|
+
combined_entropy[j] = BitsEntropy(
|
156
|
+
&combined_histo[j].data_[0], alphabet_size_);
|
157
|
+
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
|
158
|
+
}
|
159
|
+
|
160
|
+
if (split_->num_types < kMaxBlockTypes &&
|
161
|
+
diff[0] > split_threshold_ &&
|
162
|
+
diff[1] > split_threshold_) {
|
163
|
+
// Create new block.
|
164
|
+
split_->lengths[num_blocks_] = block_size_;
|
165
|
+
split_->types[num_blocks_] = split_->num_types;
|
166
|
+
last_histogram_ix_[1] = last_histogram_ix_[0];
|
167
|
+
last_histogram_ix_[0] = split_->num_types;
|
168
|
+
last_entropy_[1] = last_entropy_[0];
|
169
|
+
last_entropy_[0] = entropy;
|
170
|
+
++num_blocks_;
|
171
|
+
++split_->num_types;
|
172
|
+
++curr_histogram_ix_;
|
173
|
+
block_size_ = 0;
|
174
|
+
merge_last_count_ = 0;
|
175
|
+
target_block_size_ = min_block_size_;
|
176
|
+
} else if (diff[1] < diff[0] - 20.0) {
|
177
|
+
// Combine this block with second last block.
|
178
|
+
split_->lengths[num_blocks_] = block_size_;
|
179
|
+
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
180
|
+
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
181
|
+
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
|
182
|
+
last_entropy_[1] = last_entropy_[0];
|
183
|
+
last_entropy_[0] = combined_entropy[1];
|
184
|
+
++num_blocks_;
|
185
|
+
block_size_ = 0;
|
186
|
+
(*histograms_)[curr_histogram_ix_].Clear();
|
187
|
+
merge_last_count_ = 0;
|
188
|
+
target_block_size_ = min_block_size_;
|
189
|
+
} else {
|
190
|
+
// Combine this block with last block.
|
191
|
+
split_->lengths[num_blocks_ - 1] += block_size_;
|
192
|
+
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
|
193
|
+
last_entropy_[0] = combined_entropy[0];
|
194
|
+
if (split_->num_types == 1) {
|
195
|
+
last_entropy_[1] = last_entropy_[0];
|
196
|
+
}
|
197
|
+
block_size_ = 0;
|
198
|
+
(*histograms_)[curr_histogram_ix_].Clear();
|
199
|
+
if (++merge_last_count_ > 1) {
|
200
|
+
target_block_size_ += min_block_size_;
|
201
|
+
}
|
202
|
+
}
|
203
|
+
}
|
204
|
+
if (is_final) {
|
205
|
+
(*histograms_).resize(split_->num_types);
|
206
|
+
split_->types.resize(num_blocks_);
|
207
|
+
split_->lengths.resize(num_blocks_);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
private:
|
212
|
+
static const int kMaxBlockTypes = 256;
|
213
|
+
|
214
|
+
// Alphabet size of particular block category.
|
215
|
+
const int alphabet_size_;
|
216
|
+
// We collect at least this many symbols for each block.
|
217
|
+
const int min_block_size_;
|
218
|
+
// We merge histograms A and B if
|
219
|
+
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
220
|
+
// where A is the current histogram and B is the histogram of the last or the
|
221
|
+
// second last block type.
|
222
|
+
const double split_threshold_;
|
223
|
+
|
224
|
+
int num_blocks_;
|
225
|
+
BlockSplit* split_; // not owned
|
226
|
+
std::vector<HistogramType>* histograms_; // not owned
|
227
|
+
|
228
|
+
// The number of symbols that we want to collect before deciding on whether
|
229
|
+
// or not to merge the block with a previous one or emit a new block.
|
230
|
+
int target_block_size_;
|
231
|
+
// The number of symbols in the current histogram.
|
232
|
+
int block_size_;
|
233
|
+
// Offset of the current histogram.
|
234
|
+
int curr_histogram_ix_;
|
235
|
+
// Offset of the histograms of the previous two block types.
|
236
|
+
int last_histogram_ix_[2];
|
237
|
+
// Entropy of the previous two block types.
|
238
|
+
double last_entropy_[2];
|
239
|
+
// The number of times we merged the current block with the last one.
|
240
|
+
int merge_last_count_;
|
241
|
+
};
|
242
|
+
|
243
|
+
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
244
|
+
size_t pos,
|
245
|
+
size_t mask,
|
246
|
+
const Command *commands,
|
247
|
+
size_t n_commands,
|
248
|
+
MetaBlockSplit* mb) {
|
249
|
+
int num_literals = 0;
|
250
|
+
for (int i = 0; i < n_commands; ++i) {
|
251
|
+
num_literals += commands[i].insert_len_;
|
252
|
+
}
|
253
|
+
|
254
|
+
BlockSplitter<HistogramLiteral> lit_blocks(
|
255
|
+
256, 512, 400.0, num_literals,
|
256
|
+
&mb->literal_split, &mb->literal_histograms);
|
257
|
+
BlockSplitter<HistogramCommand> cmd_blocks(
|
258
|
+
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
259
|
+
&mb->command_split, &mb->command_histograms);
|
260
|
+
BlockSplitter<HistogramDistance> dist_blocks(
|
261
|
+
64, 512, 100.0, n_commands,
|
262
|
+
&mb->distance_split, &mb->distance_histograms);
|
263
|
+
|
264
|
+
for (int i = 0; i < n_commands; ++i) {
|
265
|
+
const Command cmd = commands[i];
|
266
|
+
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
267
|
+
for (int j = 0; j < cmd.insert_len_; ++j) {
|
268
|
+
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
269
|
+
++pos;
|
270
|
+
}
|
271
|
+
pos += cmd.copy_len_;
|
272
|
+
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
273
|
+
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
lit_blocks.FinishBlock(/* is_final = */ true);
|
278
|
+
cmd_blocks.FinishBlock(/* is_final = */ true);
|
279
|
+
dist_blocks.FinishBlock(/* is_final = */ true);
|
280
|
+
}
|
281
|
+
|
282
|
+
// Greedy block splitter for one block category (literal, command or distance).
|
283
|
+
// Gathers histograms for all context buckets.
|
284
|
+
template<typename HistogramType>
|
285
|
+
class ContextBlockSplitter {
|
286
|
+
public:
|
287
|
+
ContextBlockSplitter(int alphabet_size,
|
288
|
+
int num_contexts,
|
289
|
+
int min_block_size,
|
290
|
+
double split_threshold,
|
291
|
+
int num_symbols,
|
292
|
+
BlockSplit* split,
|
293
|
+
std::vector<HistogramType>* histograms)
|
294
|
+
: alphabet_size_(alphabet_size),
|
295
|
+
num_contexts_(num_contexts),
|
296
|
+
max_block_types_(kMaxBlockTypes / num_contexts),
|
297
|
+
min_block_size_(min_block_size),
|
298
|
+
split_threshold_(split_threshold),
|
299
|
+
num_blocks_(0),
|
300
|
+
split_(split),
|
301
|
+
histograms_(histograms),
|
302
|
+
target_block_size_(min_block_size),
|
303
|
+
block_size_(0),
|
304
|
+
curr_histogram_ix_(0),
|
305
|
+
last_entropy_(2 * num_contexts),
|
306
|
+
merge_last_count_(0) {
|
307
|
+
int max_num_blocks = num_symbols / min_block_size + 1;
|
308
|
+
// We have to allocate one more histogram than the maximum number of block
|
309
|
+
// types for the current histogram when the meta-block is too big.
|
310
|
+
int max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
|
311
|
+
split_->lengths.resize(max_num_blocks);
|
312
|
+
split_->types.resize(max_num_blocks);
|
313
|
+
histograms_->resize(max_num_types * num_contexts);
|
314
|
+
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
315
|
+
}
|
316
|
+
|
317
|
+
// Adds the next symbol to the current block type and context. When the
|
318
|
+
// current block reaches the target size, decides on merging the block.
|
319
|
+
void AddSymbol(int symbol, int context) {
|
320
|
+
(*histograms_)[curr_histogram_ix_ + context].Add(symbol);
|
321
|
+
++block_size_;
|
322
|
+
if (block_size_ == target_block_size_) {
|
323
|
+
FinishBlock(/* is_final = */ false);
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
// Does either of three things:
|
328
|
+
// (1) emits the current block with a new block type;
|
329
|
+
// (2) emits the current block with the type of the second last block;
|
330
|
+
// (3) merges the current block with the last block.
|
331
|
+
void FinishBlock(bool is_final) {
|
332
|
+
if (block_size_ < min_block_size_) {
|
333
|
+
block_size_ = min_block_size_;
|
334
|
+
}
|
335
|
+
if (num_blocks_ == 0) {
|
336
|
+
// Create first block.
|
337
|
+
split_->lengths[0] = block_size_;
|
338
|
+
split_->types[0] = 0;
|
339
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
340
|
+
last_entropy_[i] =
|
341
|
+
BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
|
342
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
343
|
+
}
|
344
|
+
++num_blocks_;
|
345
|
+
++split_->num_types;
|
346
|
+
curr_histogram_ix_ += num_contexts_;
|
347
|
+
block_size_ = 0;
|
348
|
+
} else if (block_size_ > 0) {
|
349
|
+
// Try merging the set of histograms for the current block type with the
|
350
|
+
// respective set of histograms for the last and second last block types.
|
351
|
+
// Decide over the split based on the total reduction of entropy across
|
352
|
+
// all contexts.
|
353
|
+
std::vector<double> entropy(num_contexts_);
|
354
|
+
std::vector<HistogramType> combined_histo(2 * num_contexts_);
|
355
|
+
std::vector<double> combined_entropy(2 * num_contexts_);
|
356
|
+
double diff[2] = { 0.0 };
|
357
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
358
|
+
int curr_histo_ix = curr_histogram_ix_ + i;
|
359
|
+
entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
|
360
|
+
alphabet_size_);
|
361
|
+
for (int j = 0; j < 2; ++j) {
|
362
|
+
int jx = j * num_contexts_ + i;
|
363
|
+
int last_histogram_ix = last_histogram_ix_[j] + i;
|
364
|
+
combined_histo[jx] = (*histograms_)[curr_histo_ix];
|
365
|
+
combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
|
366
|
+
combined_entropy[jx] = BitsEntropy(
|
367
|
+
&combined_histo[jx].data_[0], alphabet_size_);
|
368
|
+
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
if (split_->num_types < max_block_types_ &&
|
373
|
+
diff[0] > split_threshold_ &&
|
374
|
+
diff[1] > split_threshold_) {
|
375
|
+
// Create new block.
|
376
|
+
split_->lengths[num_blocks_] = block_size_;
|
377
|
+
split_->types[num_blocks_] = split_->num_types;
|
378
|
+
last_histogram_ix_[1] = last_histogram_ix_[0];
|
379
|
+
last_histogram_ix_[0] = split_->num_types * num_contexts_;
|
380
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
381
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
382
|
+
last_entropy_[i] = entropy[i];
|
383
|
+
}
|
384
|
+
++num_blocks_;
|
385
|
+
++split_->num_types;
|
386
|
+
curr_histogram_ix_ += num_contexts_;
|
387
|
+
block_size_ = 0;
|
388
|
+
merge_last_count_ = 0;
|
389
|
+
target_block_size_ = min_block_size_;
|
390
|
+
} else if (diff[1] < diff[0] - 20.0) {
|
391
|
+
// Combine this block with second last block.
|
392
|
+
split_->lengths[num_blocks_] = block_size_;
|
393
|
+
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
394
|
+
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
395
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
396
|
+
(*histograms_)[last_histogram_ix_[0] + i] =
|
397
|
+
combined_histo[num_contexts_ + i];
|
398
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
399
|
+
last_entropy_[i] = combined_entropy[num_contexts_ + i];
|
400
|
+
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
401
|
+
}
|
402
|
+
++num_blocks_;
|
403
|
+
block_size_ = 0;
|
404
|
+
merge_last_count_ = 0;
|
405
|
+
target_block_size_ = min_block_size_;
|
406
|
+
} else {
|
407
|
+
// Combine this block with last block.
|
408
|
+
split_->lengths[num_blocks_ - 1] += block_size_;
|
409
|
+
for (int i = 0; i < num_contexts_; ++i) {
|
410
|
+
(*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
|
411
|
+
last_entropy_[i] = combined_entropy[i];
|
412
|
+
if (split_->num_types == 1) {
|
413
|
+
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
414
|
+
}
|
415
|
+
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
416
|
+
}
|
417
|
+
block_size_ = 0;
|
418
|
+
if (++merge_last_count_ > 1) {
|
419
|
+
target_block_size_ += min_block_size_;
|
420
|
+
}
|
421
|
+
}
|
422
|
+
}
|
423
|
+
if (is_final) {
|
424
|
+
(*histograms_).resize(split_->num_types * num_contexts_);
|
425
|
+
split_->types.resize(num_blocks_);
|
426
|
+
split_->lengths.resize(num_blocks_);
|
427
|
+
}
|
428
|
+
}
|
429
|
+
|
430
|
+
private:
|
431
|
+
static const int kMaxBlockTypes = 256;
|
432
|
+
|
433
|
+
// Alphabet size of particular block category.
|
434
|
+
const int alphabet_size_;
|
435
|
+
const int num_contexts_;
|
436
|
+
const int max_block_types_;
|
437
|
+
// We collect at least this many symbols for each block.
|
438
|
+
const int min_block_size_;
|
439
|
+
// We merge histograms A and B if
|
440
|
+
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
441
|
+
// where A is the current histogram and B is the histogram of the last or the
|
442
|
+
// second last block type.
|
443
|
+
const double split_threshold_;
|
444
|
+
|
445
|
+
int num_blocks_;
|
446
|
+
BlockSplit* split_; // not owned
|
447
|
+
std::vector<HistogramType>* histograms_; // not owned
|
448
|
+
|
449
|
+
// The number of symbols that we want to collect before deciding on whether
|
450
|
+
// or not to merge the block with a previous one or emit a new block.
|
451
|
+
int target_block_size_;
|
452
|
+
// The number of symbols in the current histogram.
|
453
|
+
int block_size_;
|
454
|
+
// Offset of the current histogram.
|
455
|
+
int curr_histogram_ix_;
|
456
|
+
// Offset of the histograms of the previous two block types.
|
457
|
+
int last_histogram_ix_[2];
|
458
|
+
// Entropy of the previous two block types.
|
459
|
+
std::vector<double> last_entropy_;
|
460
|
+
// The number of times we merged the current block with the last one.
|
461
|
+
int merge_last_count_;
|
462
|
+
};
|
463
|
+
|
464
|
+
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
465
|
+
size_t pos,
|
466
|
+
size_t mask,
|
467
|
+
uint8_t prev_byte,
|
468
|
+
uint8_t prev_byte2,
|
469
|
+
int literal_context_mode,
|
470
|
+
int num_contexts,
|
471
|
+
const int* static_context_map,
|
472
|
+
const Command *commands,
|
473
|
+
size_t n_commands,
|
474
|
+
MetaBlockSplit* mb) {
|
475
|
+
int num_literals = 0;
|
476
|
+
for (int i = 0; i < n_commands; ++i) {
|
477
|
+
num_literals += commands[i].insert_len_;
|
478
|
+
}
|
479
|
+
|
480
|
+
ContextBlockSplitter<HistogramLiteral> lit_blocks(
|
481
|
+
256, num_contexts, 512, 400.0, num_literals,
|
482
|
+
&mb->literal_split, &mb->literal_histograms);
|
483
|
+
BlockSplitter<HistogramCommand> cmd_blocks(
|
484
|
+
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
485
|
+
&mb->command_split, &mb->command_histograms);
|
486
|
+
BlockSplitter<HistogramDistance> dist_blocks(
|
487
|
+
64, 512, 100.0, n_commands,
|
488
|
+
&mb->distance_split, &mb->distance_histograms);
|
489
|
+
|
490
|
+
for (int i = 0; i < n_commands; ++i) {
|
491
|
+
const Command cmd = commands[i];
|
492
|
+
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
493
|
+
for (int j = 0; j < cmd.insert_len_; ++j) {
|
494
|
+
int context = Context(prev_byte, prev_byte2, literal_context_mode);
|
495
|
+
uint8_t literal = ringbuffer[pos & mask];
|
496
|
+
lit_blocks.AddSymbol(literal, static_context_map[context]);
|
497
|
+
prev_byte2 = prev_byte;
|
498
|
+
prev_byte = literal;
|
499
|
+
++pos;
|
500
|
+
}
|
501
|
+
pos += cmd.copy_len_;
|
502
|
+
if (cmd.copy_len_ > 0) {
|
503
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
504
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
505
|
+
if (cmd.cmd_prefix_ >= 128) {
|
506
|
+
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
507
|
+
}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
|
511
|
+
lit_blocks.FinishBlock(/* is_final = */ true);
|
512
|
+
cmd_blocks.FinishBlock(/* is_final = */ true);
|
513
|
+
dist_blocks.FinishBlock(/* is_final = */ true);
|
514
|
+
|
515
|
+
mb->literal_context_map.resize(
|
516
|
+
mb->literal_split.num_types << kLiteralContextBits);
|
517
|
+
for (int i = 0; i < mb->literal_split.num_types; ++i) {
|
518
|
+
for (int j = 0; j < (1 << kLiteralContextBits); ++j) {
|
519
|
+
mb->literal_context_map[(i << kLiteralContextBits) + j] =
|
520
|
+
i * num_contexts + static_context_map[j];
|
521
|
+
}
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
void OptimizeHistograms(int num_direct_distance_codes,
|
526
|
+
int distance_postfix_bits,
|
527
|
+
MetaBlockSplit* mb) {
|
528
|
+
for (int i = 0; i < mb->literal_histograms.size(); ++i) {
|
529
|
+
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
|
530
|
+
}
|
531
|
+
for (int i = 0; i < mb->command_histograms.size(); ++i) {
|
532
|
+
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
|
533
|
+
&mb->command_histograms[i].data_[0]);
|
534
|
+
}
|
535
|
+
int num_distance_codes =
|
536
|
+
kNumDistanceShortCodes + num_direct_distance_codes +
|
537
|
+
(48 << distance_postfix_bits);
|
538
|
+
for (int i = 0; i < mb->distance_histograms.size(); ++i) {
|
539
|
+
OptimizeHuffmanCountsForRle(num_distance_codes,
|
540
|
+
&mb->distance_histograms[i].data_[0]);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
544
|
+
} // namespace brotli
|