brotli 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/README.md +36 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/brotli.gemspec +28 -0
- data/ext/brotli/brotli.cc +67 -0
- data/ext/brotli/brotli.h +9 -0
- data/ext/brotli/extconf.rb +34 -0
- data/lib/brotli.rb +2 -0
- data/lib/brotli/version.rb +3 -0
- data/vendor/brotli/LICENSE +202 -0
- data/vendor/brotli/dec/Makefile +12 -0
- data/vendor/brotli/dec/bit_reader.c +55 -0
- data/vendor/brotli/dec/bit_reader.h +256 -0
- data/vendor/brotli/dec/context.h +260 -0
- data/vendor/brotli/dec/decode.c +1573 -0
- data/vendor/brotli/dec/decode.h +160 -0
- data/vendor/brotli/dec/dictionary.h +9494 -0
- data/vendor/brotli/dec/huffman.c +325 -0
- data/vendor/brotli/dec/huffman.h +77 -0
- data/vendor/brotli/dec/port.h +148 -0
- data/vendor/brotli/dec/prefix.h +756 -0
- data/vendor/brotli/dec/state.c +149 -0
- data/vendor/brotli/dec/state.h +185 -0
- data/vendor/brotli/dec/streams.c +99 -0
- data/vendor/brotli/dec/streams.h +100 -0
- data/vendor/brotli/dec/transform.h +315 -0
- data/vendor/brotli/dec/types.h +36 -0
- data/vendor/brotli/enc/Makefile +11 -0
- data/vendor/brotli/enc/backward_references.cc +769 -0
- data/vendor/brotli/enc/backward_references.h +50 -0
- data/vendor/brotli/enc/bit_cost.h +147 -0
- data/vendor/brotli/enc/block_splitter.cc +418 -0
- data/vendor/brotli/enc/block_splitter.h +78 -0
- data/vendor/brotli/enc/brotli_bit_stream.cc +884 -0
- data/vendor/brotli/enc/brotli_bit_stream.h +149 -0
- data/vendor/brotli/enc/cluster.h +290 -0
- data/vendor/brotli/enc/command.h +140 -0
- data/vendor/brotli/enc/context.h +185 -0
- data/vendor/brotli/enc/dictionary.h +9485 -0
- data/vendor/brotli/enc/dictionary_hash.h +4125 -0
- data/vendor/brotli/enc/encode.cc +715 -0
- data/vendor/brotli/enc/encode.h +196 -0
- data/vendor/brotli/enc/encode_parallel.cc +354 -0
- data/vendor/brotli/enc/encode_parallel.h +37 -0
- data/vendor/brotli/enc/entropy_encode.cc +492 -0
- data/vendor/brotli/enc/entropy_encode.h +88 -0
- data/vendor/brotli/enc/fast_log.h +179 -0
- data/vendor/brotli/enc/find_match_length.h +87 -0
- data/vendor/brotli/enc/hash.h +686 -0
- data/vendor/brotli/enc/histogram.cc +76 -0
- data/vendor/brotli/enc/histogram.h +100 -0
- data/vendor/brotli/enc/literal_cost.cc +172 -0
- data/vendor/brotli/enc/literal_cost.h +38 -0
- data/vendor/brotli/enc/metablock.cc +544 -0
- data/vendor/brotli/enc/metablock.h +88 -0
- data/vendor/brotli/enc/port.h +151 -0
- data/vendor/brotli/enc/prefix.h +85 -0
- data/vendor/brotli/enc/ringbuffer.h +108 -0
- data/vendor/brotli/enc/static_dict.cc +441 -0
- data/vendor/brotli/enc/static_dict.h +40 -0
- data/vendor/brotli/enc/static_dict_lut.h +12063 -0
- data/vendor/brotli/enc/streams.cc +127 -0
- data/vendor/brotli/enc/streams.h +129 -0
- data/vendor/brotli/enc/transform.h +250 -0
- data/vendor/brotli/enc/write_bits.h +91 -0
- metadata +171 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
// Copyright 2014 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Functions to convert brotli-related data structures into the
|
16
|
+
// brotli bit stream. The functions here operate under
|
17
|
+
// assumption that there is enough space in the storage, i.e., there are
|
18
|
+
// no out-of-range checks anywhere.
|
19
|
+
//
|
20
|
+
// These functions do bit addressing into a byte array. The byte array
|
21
|
+
// is called "storage" and the index to the bit is called storage_ix
|
22
|
+
// in function arguments.
|
23
|
+
|
24
|
+
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
25
|
+
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
26
|
+
|
27
|
+
#include <stddef.h>
|
28
|
+
#include <stdint.h>
|
29
|
+
#include <vector>
|
30
|
+
|
31
|
+
#include "./metablock.h"
|
32
|
+
|
33
|
+
namespace brotli {
|
34
|
+
|
35
|
+
// All Store functions here will use a storage_ix, which is always the bit
|
36
|
+
// position for the current storage.
|
37
|
+
|
38
|
+
// Stores a number between 0 and 255.
|
39
|
+
void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage);
|
40
|
+
|
41
|
+
// Stores the compressed meta-block header.
|
42
|
+
bool StoreCompressedMetaBlockHeader(bool final_block,
|
43
|
+
size_t length,
|
44
|
+
int* storage_ix,
|
45
|
+
uint8_t* storage);
|
46
|
+
|
47
|
+
// Stores the uncompressed meta-block header.
|
48
|
+
bool StoreUncompressedMetaBlockHeader(size_t length,
|
49
|
+
int* storage_ix,
|
50
|
+
uint8_t* storage);
|
51
|
+
|
52
|
+
// Stores a context map where the histogram type is always the block type.
|
53
|
+
void StoreTrivialContextMap(int num_types,
|
54
|
+
int context_bits,
|
55
|
+
int* storage_ix,
|
56
|
+
uint8_t* storage);
|
57
|
+
|
58
|
+
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
59
|
+
const int num_codes,
|
60
|
+
const uint8_t *code_length_bitdepth,
|
61
|
+
int *storage_ix,
|
62
|
+
uint8_t *storage);
|
63
|
+
|
64
|
+
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
65
|
+
// bits[0:length] and stores the encoded tree to the bit stream.
|
66
|
+
void BuildAndStoreHuffmanTree(const int *histogram,
|
67
|
+
const int length,
|
68
|
+
uint8_t* depth,
|
69
|
+
uint16_t* bits,
|
70
|
+
int* storage_ix,
|
71
|
+
uint8_t* storage);
|
72
|
+
|
73
|
+
// Encodes the given context map to the bit stream. The number of different
|
74
|
+
// histogram ids is given by num_clusters.
|
75
|
+
void EncodeContextMap(const std::vector<int>& context_map,
|
76
|
+
int num_clusters,
|
77
|
+
int* storage_ix, uint8_t* storage);
|
78
|
+
|
79
|
+
// Data structure that stores everything that is needed to encode each block
|
80
|
+
// switch command.
|
81
|
+
struct BlockSplitCode {
|
82
|
+
std::vector<int> type_code;
|
83
|
+
std::vector<int> length_prefix;
|
84
|
+
std::vector<int> length_nextra;
|
85
|
+
std::vector<int> length_extra;
|
86
|
+
std::vector<uint8_t> type_depths;
|
87
|
+
std::vector<uint16_t> type_bits;
|
88
|
+
std::vector<uint8_t> length_depths;
|
89
|
+
std::vector<uint16_t> length_bits;
|
90
|
+
};
|
91
|
+
|
92
|
+
// Builds a BlockSplitCode data structure from the block split given by the
|
93
|
+
// vector of block types and block lengths and stores it to the bit stream.
|
94
|
+
void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
|
95
|
+
const std::vector<int>& lengths,
|
96
|
+
const int num_types,
|
97
|
+
BlockSplitCode* code,
|
98
|
+
int* storage_ix,
|
99
|
+
uint8_t* storage);
|
100
|
+
|
101
|
+
// Stores the block switch command with index block_ix to the bit stream.
|
102
|
+
void StoreBlockSwitch(const BlockSplitCode& code,
|
103
|
+
const int block_ix,
|
104
|
+
int* storage_ix,
|
105
|
+
uint8_t* storage);
|
106
|
+
|
107
|
+
bool StoreMetaBlock(const uint8_t* input,
|
108
|
+
size_t start_pos,
|
109
|
+
size_t length,
|
110
|
+
size_t mask,
|
111
|
+
uint8_t prev_byte,
|
112
|
+
uint8_t prev_byte2,
|
113
|
+
bool final_block,
|
114
|
+
int num_direct_distance_codes,
|
115
|
+
int distance_postfix_bits,
|
116
|
+
int literal_context_mode,
|
117
|
+
const brotli::Command *commands,
|
118
|
+
size_t n_commands,
|
119
|
+
const MetaBlockSplit& mb,
|
120
|
+
int *storage_ix,
|
121
|
+
uint8_t *storage);
|
122
|
+
|
123
|
+
// Stores the meta-block without doing any block splitting, just collects
|
124
|
+
// one histogram per block category and uses that for entropy coding.
|
125
|
+
bool StoreMetaBlockTrivial(const uint8_t* input,
|
126
|
+
size_t start_pos,
|
127
|
+
size_t length,
|
128
|
+
size_t mask,
|
129
|
+
bool is_last,
|
130
|
+
const brotli::Command *commands,
|
131
|
+
size_t n_commands,
|
132
|
+
int *storage_ix,
|
133
|
+
uint8_t *storage);
|
134
|
+
|
135
|
+
// This is for storing uncompressed blocks (simple raw storage of
|
136
|
+
// bytes-as-bytes).
|
137
|
+
bool StoreUncompressedMetaBlock(bool final_block,
|
138
|
+
const uint8_t* input,
|
139
|
+
size_t position, size_t mask,
|
140
|
+
size_t len,
|
141
|
+
int* storage_ix,
|
142
|
+
uint8_t* storage);
|
143
|
+
|
144
|
+
// Stores an empty metadata meta-block and syncs to a byte boundary.
|
145
|
+
void StoreSyncMetaBlock(int* storage_ix, uint8_t* storage);
|
146
|
+
|
147
|
+
} // namespace brotli
|
148
|
+
|
149
|
+
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
@@ -0,0 +1,290 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Functions for clustering similar histograms together.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_CLUSTER_H_
|
18
|
+
#define BROTLI_ENC_CLUSTER_H_
|
19
|
+
|
20
|
+
#include <math.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <algorithm>
|
24
|
+
#include <complex>
|
25
|
+
#include <map>
|
26
|
+
#include <set>
|
27
|
+
#include <utility>
|
28
|
+
#include <vector>
|
29
|
+
|
30
|
+
#include "./bit_cost.h"
|
31
|
+
#include "./entropy_encode.h"
|
32
|
+
#include "./fast_log.h"
|
33
|
+
#include "./histogram.h"
|
34
|
+
|
35
|
+
namespace brotli {
|
36
|
+
|
37
|
+
struct HistogramPair {
|
38
|
+
int idx1;
|
39
|
+
int idx2;
|
40
|
+
bool valid;
|
41
|
+
double cost_combo;
|
42
|
+
double cost_diff;
|
43
|
+
};
|
44
|
+
|
45
|
+
struct HistogramPairComparator {
|
46
|
+
bool operator()(const HistogramPair& p1, const HistogramPair& p2) const {
|
47
|
+
if (p1.cost_diff != p2.cost_diff) {
|
48
|
+
return p1.cost_diff > p2.cost_diff;
|
49
|
+
}
|
50
|
+
return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
|
51
|
+
}
|
52
|
+
};
|
53
|
+
|
54
|
+
// Returns entropy reduction of the context map when we combine two clusters.
|
55
|
+
inline double ClusterCostDiff(int size_a, int size_b) {
|
56
|
+
int size_c = size_a + size_b;
|
57
|
+
return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
|
58
|
+
size_c * FastLog2(size_c);
|
59
|
+
}
|
60
|
+
|
61
|
+
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
|
62
|
+
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
|
63
|
+
template<typename HistogramType>
|
64
|
+
void CompareAndPushToHeap(const HistogramType* out,
|
65
|
+
const int* cluster_size,
|
66
|
+
int idx1, int idx2,
|
67
|
+
std::vector<HistogramPair>* pairs) {
|
68
|
+
if (idx1 == idx2) {
|
69
|
+
return;
|
70
|
+
}
|
71
|
+
if (idx2 < idx1) {
|
72
|
+
int t = idx2;
|
73
|
+
idx2 = idx1;
|
74
|
+
idx1 = t;
|
75
|
+
}
|
76
|
+
bool store_pair = false;
|
77
|
+
HistogramPair p;
|
78
|
+
p.idx1 = idx1;
|
79
|
+
p.idx2 = idx2;
|
80
|
+
p.valid = true;
|
81
|
+
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
|
82
|
+
p.cost_diff -= out[idx1].bit_cost_;
|
83
|
+
p.cost_diff -= out[idx2].bit_cost_;
|
84
|
+
|
85
|
+
if (out[idx1].total_count_ == 0) {
|
86
|
+
p.cost_combo = out[idx2].bit_cost_;
|
87
|
+
store_pair = true;
|
88
|
+
} else if (out[idx2].total_count_ == 0) {
|
89
|
+
p.cost_combo = out[idx1].bit_cost_;
|
90
|
+
store_pair = true;
|
91
|
+
} else {
|
92
|
+
double threshold = pairs->empty() ? 1e99 :
|
93
|
+
std::max(0.0, (*pairs)[0].cost_diff);
|
94
|
+
HistogramType combo = out[idx1];
|
95
|
+
combo.AddHistogram(out[idx2]);
|
96
|
+
double cost_combo = PopulationCost(combo);
|
97
|
+
if (cost_combo < threshold - p.cost_diff) {
|
98
|
+
p.cost_combo = cost_combo;
|
99
|
+
store_pair = true;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
if (store_pair) {
|
103
|
+
p.cost_diff += p.cost_combo;
|
104
|
+
pairs->push_back(p);
|
105
|
+
std::push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
template<typename HistogramType>
|
110
|
+
void HistogramCombine(HistogramType* out,
|
111
|
+
int* cluster_size,
|
112
|
+
int* symbols,
|
113
|
+
int symbols_size,
|
114
|
+
int max_clusters) {
|
115
|
+
double cost_diff_threshold = 0.0;
|
116
|
+
int min_cluster_size = 1;
|
117
|
+
std::set<int> all_symbols;
|
118
|
+
std::vector<int> clusters;
|
119
|
+
for (int i = 0; i < symbols_size; ++i) {
|
120
|
+
if (all_symbols.find(symbols[i]) == all_symbols.end()) {
|
121
|
+
all_symbols.insert(symbols[i]);
|
122
|
+
clusters.push_back(symbols[i]);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
|
127
|
+
std::vector<HistogramPair> pairs;
|
128
|
+
for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
|
129
|
+
for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
|
130
|
+
CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
|
131
|
+
&pairs);
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
while (clusters.size() > min_cluster_size) {
|
136
|
+
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
137
|
+
cost_diff_threshold = 1e99;
|
138
|
+
min_cluster_size = max_clusters;
|
139
|
+
continue;
|
140
|
+
}
|
141
|
+
// Take the best pair from the top of heap.
|
142
|
+
int best_idx1 = pairs[0].idx1;
|
143
|
+
int best_idx2 = pairs[0].idx2;
|
144
|
+
out[best_idx1].AddHistogram(out[best_idx2]);
|
145
|
+
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
|
146
|
+
cluster_size[best_idx1] += cluster_size[best_idx2];
|
147
|
+
for (int i = 0; i < symbols_size; ++i) {
|
148
|
+
if (symbols[i] == best_idx2) {
|
149
|
+
symbols[i] = best_idx1;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
for (int i = 0; i + 1 < clusters.size(); ++i) {
|
153
|
+
if (clusters[i] >= best_idx2) {
|
154
|
+
clusters[i] = clusters[i + 1];
|
155
|
+
}
|
156
|
+
}
|
157
|
+
clusters.pop_back();
|
158
|
+
// Invalidate pairs intersecting the just combined best pair.
|
159
|
+
for (int i = 0; i < pairs.size(); ++i) {
|
160
|
+
HistogramPair& p = pairs[i];
|
161
|
+
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
|
162
|
+
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
|
163
|
+
p.valid = false;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
// Pop invalid pairs from the top of the heap.
|
167
|
+
while (!pairs.empty() && !pairs[0].valid) {
|
168
|
+
std::pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
|
169
|
+
pairs.pop_back();
|
170
|
+
}
|
171
|
+
// Push new pairs formed with the combined histogram to the heap.
|
172
|
+
for (int i = 0; i < clusters.size(); ++i) {
|
173
|
+
CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
|
174
|
+
}
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
// -----------------------------------------------------------------------------
|
179
|
+
// Histogram refinement
|
180
|
+
|
181
|
+
// What is the bit cost of moving histogram from cur_symbol to candidate.
|
182
|
+
template<typename HistogramType>
|
183
|
+
double HistogramBitCostDistance(const HistogramType& histogram,
|
184
|
+
const HistogramType& candidate) {
|
185
|
+
if (histogram.total_count_ == 0) {
|
186
|
+
return 0.0;
|
187
|
+
}
|
188
|
+
HistogramType tmp = histogram;
|
189
|
+
tmp.AddHistogram(candidate);
|
190
|
+
return PopulationCost(tmp) - candidate.bit_cost_;
|
191
|
+
}
|
192
|
+
|
193
|
+
// Find the best 'out' histogram for each of the 'in' histograms.
|
194
|
+
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
195
|
+
template<typename HistogramType>
|
196
|
+
void HistogramRemap(const HistogramType* in, int in_size,
|
197
|
+
HistogramType* out, int* symbols) {
|
198
|
+
std::set<int> all_symbols;
|
199
|
+
for (int i = 0; i < in_size; ++i) {
|
200
|
+
all_symbols.insert(symbols[i]);
|
201
|
+
}
|
202
|
+
for (int i = 0; i < in_size; ++i) {
|
203
|
+
int best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
204
|
+
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
|
205
|
+
for (std::set<int>::const_iterator k = all_symbols.begin();
|
206
|
+
k != all_symbols.end(); ++k) {
|
207
|
+
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
|
208
|
+
if (cur_bits < best_bits) {
|
209
|
+
best_bits = cur_bits;
|
210
|
+
best_out = *k;
|
211
|
+
}
|
212
|
+
}
|
213
|
+
symbols[i] = best_out;
|
214
|
+
}
|
215
|
+
|
216
|
+
|
217
|
+
// Recompute each out based on raw and symbols.
|
218
|
+
for (std::set<int>::const_iterator k = all_symbols.begin();
|
219
|
+
k != all_symbols.end(); ++k) {
|
220
|
+
out[*k].Clear();
|
221
|
+
}
|
222
|
+
for (int i = 0; i < in_size; ++i) {
|
223
|
+
out[symbols[i]].AddHistogram(in[i]);
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
// Reorder histograms in *out so that the new symbols in *symbols come in
|
228
|
+
// increasing order.
|
229
|
+
template<typename HistogramType>
|
230
|
+
void HistogramReindex(std::vector<HistogramType>* out,
|
231
|
+
std::vector<int>* symbols) {
|
232
|
+
std::vector<HistogramType> tmp(*out);
|
233
|
+
std::map<int, int> new_index;
|
234
|
+
int next_index = 0;
|
235
|
+
for (int i = 0; i < symbols->size(); ++i) {
|
236
|
+
if (new_index.find((*symbols)[i]) == new_index.end()) {
|
237
|
+
new_index[(*symbols)[i]] = next_index;
|
238
|
+
(*out)[next_index] = tmp[(*symbols)[i]];
|
239
|
+
++next_index;
|
240
|
+
}
|
241
|
+
}
|
242
|
+
out->resize(next_index);
|
243
|
+
for (int i = 0; i < symbols->size(); ++i) {
|
244
|
+
(*symbols)[i] = new_index[(*symbols)[i]];
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
// Clusters similar histograms in 'in' together, the selected histograms are
|
249
|
+
// placed in 'out', and for each index in 'in', *histogram_symbols will
|
250
|
+
// indicate which of the 'out' histograms is the best approximation.
|
251
|
+
template<typename HistogramType>
|
252
|
+
void ClusterHistograms(const std::vector<HistogramType>& in,
|
253
|
+
int num_contexts, int num_blocks,
|
254
|
+
int max_histograms,
|
255
|
+
std::vector<HistogramType>* out,
|
256
|
+
std::vector<int>* histogram_symbols) {
|
257
|
+
const int in_size = num_contexts * num_blocks;
|
258
|
+
std::vector<int> cluster_size(in_size, 1);
|
259
|
+
out->resize(in_size);
|
260
|
+
histogram_symbols->resize(in_size);
|
261
|
+
for (int i = 0; i < in_size; ++i) {
|
262
|
+
(*out)[i] = in[i];
|
263
|
+
(*out)[i].bit_cost_ = PopulationCost(in[i]);
|
264
|
+
(*histogram_symbols)[i] = i;
|
265
|
+
}
|
266
|
+
|
267
|
+
const int max_input_histograms = 64;
|
268
|
+
for (int i = 0; i < in_size; i += max_input_histograms) {
|
269
|
+
int num_to_combine = std::min(in_size - i, max_input_histograms);
|
270
|
+
HistogramCombine(&(*out)[0], &cluster_size[0],
|
271
|
+
&(*histogram_symbols)[i], num_to_combine,
|
272
|
+
max_histograms);
|
273
|
+
}
|
274
|
+
|
275
|
+
// Collapse similar histograms.
|
276
|
+
HistogramCombine(&(*out)[0], &cluster_size[0],
|
277
|
+
&(*histogram_symbols)[0], in_size,
|
278
|
+
max_histograms);
|
279
|
+
|
280
|
+
// Find the optimal map from original histograms to the final ones.
|
281
|
+
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
|
282
|
+
|
283
|
+
// Convert the context map to a canonical form.
|
284
|
+
HistogramReindex(out, histogram_symbols);
|
285
|
+
}
|
286
|
+
|
287
|
+
|
288
|
+
} // namespace brotli
|
289
|
+
|
290
|
+
#endif // BROTLI_ENC_CLUSTER_H_
|
@@ -0,0 +1,140 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// This class models a sequence of literals and a backward reference copy.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_COMMAND_H_
|
18
|
+
#define BROTLI_ENC_COMMAND_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
#include "./fast_log.h"
|
22
|
+
#include "./prefix.h"
|
23
|
+
|
24
|
+
namespace brotli {
|
25
|
+
|
26
|
+
static int insbase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66,
|
27
|
+
98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
|
28
|
+
static int insextra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
|
29
|
+
5, 6, 7, 8, 9, 10, 12, 14, 24 };
|
30
|
+
static int copybase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38,
|
31
|
+
54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
|
32
|
+
static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4,
|
33
|
+
4, 5, 5, 6, 7, 8, 9, 10, 24 };
|
34
|
+
|
35
|
+
static inline int GetInsertLengthCode(int insertlen) {
|
36
|
+
if (insertlen < 6) {
|
37
|
+
return insertlen;
|
38
|
+
} else if (insertlen < 130) {
|
39
|
+
insertlen -= 2;
|
40
|
+
int nbits = Log2FloorNonZero(insertlen) - 1;
|
41
|
+
return (nbits << 1) + (insertlen >> nbits) + 2;
|
42
|
+
} else if (insertlen < 2114) {
|
43
|
+
return Log2FloorNonZero(insertlen - 66) + 10;
|
44
|
+
} else if (insertlen < 6210) {
|
45
|
+
return 21;
|
46
|
+
} else if (insertlen < 22594) {
|
47
|
+
return 22;
|
48
|
+
} else {
|
49
|
+
return 23;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
static inline int GetCopyLengthCode(int copylen) {
|
54
|
+
if (copylen < 10) {
|
55
|
+
return copylen - 2;
|
56
|
+
} else if (copylen < 134) {
|
57
|
+
copylen -= 6;
|
58
|
+
int nbits = Log2FloorNonZero(copylen) - 1;
|
59
|
+
return (nbits << 1) + (copylen >> nbits) + 4;
|
60
|
+
} else if (copylen < 2118) {
|
61
|
+
return Log2FloorNonZero(copylen - 70) + 12;
|
62
|
+
} else {
|
63
|
+
return 23;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
static inline int CombineLengthCodes(
|
68
|
+
int inscode, int copycode, int distancecode) {
|
69
|
+
int bits64 = (copycode & 0x7u) | ((inscode & 0x7u) << 3);
|
70
|
+
if (distancecode == 0 && inscode < 8 && copycode < 16) {
|
71
|
+
return (copycode < 8) ? bits64 : (bits64 | 64);
|
72
|
+
} else {
|
73
|
+
// "To convert an insert-and-copy length code to an insert length code and
|
74
|
+
// a copy length code, the following table can be used"
|
75
|
+
static const int cells[9] = { 2, 3, 6, 4, 5, 8, 7, 9, 10 };
|
76
|
+
return (cells[(copycode >> 3) + 3 * (inscode >> 3)] << 6) | bits64;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
static inline void GetLengthCode(int insertlen, int copylen, int distancecode,
|
81
|
+
uint16_t* code, uint64_t* extra) {
|
82
|
+
int inscode = GetInsertLengthCode(insertlen);
|
83
|
+
int copycode = GetCopyLengthCode(copylen);
|
84
|
+
uint64_t insnumextra = insextra[inscode];
|
85
|
+
uint64_t numextra = insnumextra + copyextra[copycode];
|
86
|
+
uint64_t insextraval = insertlen - insbase[inscode];
|
87
|
+
uint64_t copyextraval = copylen - copybase[copycode];
|
88
|
+
*code = CombineLengthCodes(inscode, copycode, distancecode);
|
89
|
+
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
|
90
|
+
}
|
91
|
+
|
92
|
+
struct Command {
|
93
|
+
Command() {}
|
94
|
+
|
95
|
+
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
|
96
|
+
Command(int insertlen, int copylen, int copylen_code, int distance_code)
|
97
|
+
: insert_len_(insertlen), copy_len_(copylen) {
|
98
|
+
// The distance prefix and extra bits are stored in this Command as if
|
99
|
+
// npostfix and ndirect were 0, they are only recomputed later after the
|
100
|
+
// clustering if needed.
|
101
|
+
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
|
102
|
+
GetLengthCode(insertlen, copylen_code, dist_prefix_,
|
103
|
+
&cmd_prefix_, &cmd_extra_);
|
104
|
+
}
|
105
|
+
|
106
|
+
Command(int insertlen)
|
107
|
+
: insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) {
|
108
|
+
GetLengthCode(insertlen, 4, dist_prefix_, &cmd_prefix_, &cmd_extra_);
|
109
|
+
}
|
110
|
+
|
111
|
+
int DistanceCode() const {
|
112
|
+
if (dist_prefix_ < 16) {
|
113
|
+
return dist_prefix_;
|
114
|
+
}
|
115
|
+
int nbits = dist_extra_ >> 24;
|
116
|
+
int extra = dist_extra_ & 0xffffff;
|
117
|
+
int prefix = dist_prefix_ - 12 - 2 * nbits;
|
118
|
+
return (prefix << nbits) + extra + 12;
|
119
|
+
}
|
120
|
+
|
121
|
+
int DistanceContext() const {
|
122
|
+
int r = cmd_prefix_ >> 6;
|
123
|
+
int c = cmd_prefix_ & 7;
|
124
|
+
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
|
125
|
+
return c;
|
126
|
+
}
|
127
|
+
return 3;
|
128
|
+
}
|
129
|
+
|
130
|
+
int insert_len_;
|
131
|
+
int copy_len_;
|
132
|
+
uint16_t cmd_prefix_;
|
133
|
+
uint16_t dist_prefix_;
|
134
|
+
uint64_t cmd_extra_;
|
135
|
+
uint32_t dist_extra_;
|
136
|
+
};
|
137
|
+
|
138
|
+
} // namespace brotli
|
139
|
+
|
140
|
+
#endif // BROTLI_ENC_COMMAND_H_
|