brotli 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/brotli/brotli.cc +114 -24
- data/ext/brotli/brotli.h +0 -1
- data/ext/brotli/extconf.rb +30 -23
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +1 -1
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +3 -3
- data/vendor/brotli/dec/bit_reader.h +25 -27
- data/vendor/brotli/dec/context.h +4 -4
- data/vendor/brotli/dec/decode.c +410 -486
- data/vendor/brotli/dec/decode.h +101 -105
- data/vendor/brotli/dec/dictionary.c +1 -1
- data/vendor/brotli/dec/dictionary.h +7 -8
- data/vendor/brotli/dec/huffman.c +103 -105
- data/vendor/brotli/dec/huffman.h +18 -18
- data/vendor/brotli/dec/port.h +52 -40
- data/vendor/brotli/dec/prefix.h +2 -0
- data/vendor/brotli/dec/state.c +13 -19
- data/vendor/brotli/dec/state.h +25 -39
- data/vendor/brotli/dec/transform.h +38 -44
- data/vendor/brotli/dec/types.h +2 -2
- data/vendor/brotli/enc/Makefile +1 -1
- data/vendor/brotli/enc/backward_references.cc +455 -359
- data/vendor/brotli/enc/backward_references.h +79 -3
- data/vendor/brotli/enc/bit_cost.h +54 -32
- data/vendor/brotli/enc/block_splitter.cc +285 -193
- data/vendor/brotli/enc/block_splitter.h +4 -12
- data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
- data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
- data/vendor/brotli/enc/cluster.h +161 -120
- data/vendor/brotli/enc/command.h +60 -37
- data/vendor/brotli/enc/compress_fragment.cc +701 -0
- data/vendor/brotli/enc/compress_fragment.h +47 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
- data/vendor/brotli/enc/compressor.h +15 -0
- data/vendor/brotli/enc/context.h +1 -1
- data/vendor/brotli/enc/dictionary.h +2 -2
- data/vendor/brotli/enc/encode.cc +819 -286
- data/vendor/brotli/enc/encode.h +38 -15
- data/vendor/brotli/enc/encode_parallel.cc +40 -42
- data/vendor/brotli/enc/entropy_encode.cc +144 -147
- data/vendor/brotli/enc/entropy_encode.h +32 -8
- data/vendor/brotli/enc/entropy_encode_static.h +572 -0
- data/vendor/brotli/enc/fast_log.h +7 -40
- data/vendor/brotli/enc/find_match_length.h +9 -9
- data/vendor/brotli/enc/hash.h +462 -154
- data/vendor/brotli/enc/histogram.cc +6 -6
- data/vendor/brotli/enc/histogram.h +13 -13
- data/vendor/brotli/enc/literal_cost.cc +45 -45
- data/vendor/brotli/enc/metablock.cc +92 -89
- data/vendor/brotli/enc/metablock.h +12 -12
- data/vendor/brotli/enc/port.h +7 -16
- data/vendor/brotli/enc/prefix.h +23 -22
- data/vendor/brotli/enc/ringbuffer.h +75 -29
- data/vendor/brotli/enc/static_dict.cc +56 -48
- data/vendor/brotli/enc/static_dict.h +5 -5
- data/vendor/brotli/enc/streams.cc +1 -1
- data/vendor/brotli/enc/streams.h +5 -5
- data/vendor/brotli/enc/transform.h +40 -35
- data/vendor/brotli/enc/types.h +2 -0
- data/vendor/brotli/enc/utf8_util.cc +3 -2
- data/vendor/brotli/enc/write_bits.h +6 -6
- metadata +9 -5
- data/vendor/brotli/dec/streams.c +0 -102
- data/vendor/brotli/dec/streams.h +0 -95
@@ -23,17 +23,93 @@ namespace brotli {
|
|
23
23
|
// by this call.
|
24
24
|
void CreateBackwardReferences(size_t num_bytes,
|
25
25
|
size_t position,
|
26
|
+
bool is_last,
|
26
27
|
const uint8_t* ringbuffer,
|
27
28
|
size_t ringbuffer_mask,
|
28
|
-
const size_t max_backward_limit,
|
29
29
|
const int quality,
|
30
|
+
const int lgwin,
|
30
31
|
Hashers* hashers,
|
31
32
|
int hash_type,
|
32
33
|
int* dist_cache,
|
33
|
-
|
34
|
+
size_t* last_insert_len,
|
34
35
|
Command* commands,
|
35
36
|
size_t* num_commands,
|
36
|
-
|
37
|
+
size_t* num_literals);
|
38
|
+
|
39
|
+
static const float kInfinity = std::numeric_limits<float>::infinity();
|
40
|
+
|
41
|
+
struct ZopfliNode {
|
42
|
+
ZopfliNode(void) : length(1),
|
43
|
+
distance(0),
|
44
|
+
insert_length(0),
|
45
|
+
cost(kInfinity) {}
|
46
|
+
|
47
|
+
inline uint32_t copy_length() const {
|
48
|
+
return length & 0xffffff;
|
49
|
+
}
|
50
|
+
|
51
|
+
inline uint32_t length_code() const {
|
52
|
+
const uint32_t modifier = length >> 24;
|
53
|
+
return copy_length() + 9u - modifier;
|
54
|
+
}
|
55
|
+
|
56
|
+
inline uint32_t copy_distance() const {
|
57
|
+
return distance & 0x1ffffff;
|
58
|
+
}
|
59
|
+
|
60
|
+
inline uint32_t distance_code() const {
|
61
|
+
const uint32_t short_code = distance >> 25;
|
62
|
+
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
|
63
|
+
}
|
64
|
+
|
65
|
+
inline uint32_t command_length() const {
|
66
|
+
return copy_length() + insert_length;
|
67
|
+
}
|
68
|
+
|
69
|
+
// best length to get up to this byte (not including this byte itself)
|
70
|
+
// highest 8 bit is used to reconstruct the length code
|
71
|
+
uint32_t length;
|
72
|
+
// distance associated with the length
|
73
|
+
// highest 7 bit contains distance short code + 1 (or zero if no short code)
|
74
|
+
uint32_t distance;
|
75
|
+
// number of literal inserts before this copy
|
76
|
+
uint32_t insert_length;
|
77
|
+
// smallest cost to get to this byte from the beginning, as found so far
|
78
|
+
float cost;
|
79
|
+
};
|
80
|
+
|
81
|
+
// Computes the shortest path of commands from position to at most
|
82
|
+
// position + num_bytes.
|
83
|
+
//
|
84
|
+
// On return, path->size() is the number of commands found and path[i] is the
|
85
|
+
// length of the ith command (copy length plus insert length).
|
86
|
+
// Note that the sum of the lengths of all commands can be less than num_bytes.
|
87
|
+
//
|
88
|
+
// On return, the nodes[0..num_bytes] array will have the following
|
89
|
+
// "ZopfliNode array invariant":
|
90
|
+
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
91
|
+
// (1) nodes[i].copy_length() >= 2
|
92
|
+
// (2) nodes[i].command_length() <= i and
|
93
|
+
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
|
94
|
+
void ZopfliComputeShortestPath(size_t num_bytes,
|
95
|
+
size_t position,
|
96
|
+
const uint8_t* ringbuffer,
|
97
|
+
size_t ringbuffer_mask,
|
98
|
+
const size_t max_backward_limit,
|
99
|
+
const int* dist_cache,
|
100
|
+
Hashers::H10* hasher,
|
101
|
+
ZopfliNode* nodes,
|
102
|
+
std::vector<uint32_t>* path);
|
103
|
+
|
104
|
+
void ZopfliCreateCommands(const size_t num_bytes,
|
105
|
+
const size_t block_start,
|
106
|
+
const size_t max_backward_limit,
|
107
|
+
const std::vector<uint32_t>& path,
|
108
|
+
const ZopfliNode* nodes,
|
109
|
+
int* dist_cache,
|
110
|
+
size_t* last_insert_len,
|
111
|
+
Command* commands,
|
112
|
+
size_t* num_literals);
|
37
113
|
|
38
114
|
} // namespace brotli
|
39
115
|
|
@@ -9,89 +9,111 @@
|
|
9
9
|
#ifndef BROTLI_ENC_BIT_COST_H_
|
10
10
|
#define BROTLI_ENC_BIT_COST_H_
|
11
11
|
|
12
|
-
|
13
|
-
|
14
12
|
#include "./entropy_encode.h"
|
15
13
|
#include "./fast_log.h"
|
16
14
|
#include "./types.h"
|
17
15
|
|
18
16
|
namespace brotli {
|
19
17
|
|
20
|
-
static inline double ShannonEntropy(const
|
21
|
-
|
22
|
-
|
18
|
+
static inline double ShannonEntropy(const uint32_t *population, size_t size,
|
19
|
+
size_t *total) {
|
20
|
+
size_t sum = 0;
|
23
21
|
double retval = 0;
|
24
|
-
const
|
25
|
-
|
22
|
+
const uint32_t *population_end = population + size;
|
23
|
+
size_t p;
|
26
24
|
if (size & 1) {
|
27
25
|
goto odd_number_of_elements_left;
|
28
26
|
}
|
29
27
|
while (population < population_end) {
|
30
28
|
p = *population++;
|
31
29
|
sum += p;
|
32
|
-
retval -= p * FastLog2(p);
|
30
|
+
retval -= static_cast<double>(p) * FastLog2(p);
|
33
31
|
odd_number_of_elements_left:
|
34
32
|
p = *population++;
|
35
33
|
sum += p;
|
36
|
-
retval -= p * FastLog2(p);
|
34
|
+
retval -= static_cast<double>(p) * FastLog2(p);
|
37
35
|
}
|
38
|
-
if (sum) retval += sum * FastLog2(sum);
|
36
|
+
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
|
39
37
|
*total = sum;
|
40
38
|
return retval;
|
41
39
|
}
|
42
40
|
|
43
|
-
static inline double BitsEntropy(const
|
44
|
-
|
41
|
+
static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
42
|
+
size_t sum;
|
45
43
|
double retval = ShannonEntropy(population, size, &sum);
|
46
44
|
if (retval < sum) {
|
47
45
|
// At least one bit per literal is needed.
|
48
|
-
retval = sum;
|
46
|
+
retval = static_cast<double>(sum);
|
49
47
|
}
|
50
48
|
return retval;
|
51
49
|
}
|
52
50
|
|
53
|
-
|
54
51
|
template<int kSize>
|
55
52
|
double PopulationCost(const Histogram<kSize>& histogram) {
|
53
|
+
static const double kOneSymbolHistogramCost = 12;
|
54
|
+
static const double kTwoSymbolHistogramCost = 20;
|
55
|
+
static const double kThreeSymbolHistogramCost = 28;
|
56
|
+
static const double kFourSymbolHistogramCost = 37;
|
56
57
|
if (histogram.total_count_ == 0) {
|
57
|
-
return
|
58
|
+
return kOneSymbolHistogramCost;
|
58
59
|
}
|
59
60
|
int count = 0;
|
61
|
+
int s[5];
|
60
62
|
for (int i = 0; i < kSize; ++i) {
|
61
63
|
if (histogram.data_[i] > 0) {
|
64
|
+
s[count] = i;
|
62
65
|
++count;
|
66
|
+
if (count > 4) break;
|
63
67
|
}
|
64
68
|
}
|
65
69
|
if (count == 1) {
|
66
|
-
return
|
70
|
+
return kOneSymbolHistogramCost;
|
67
71
|
}
|
68
72
|
if (count == 2) {
|
69
|
-
return
|
73
|
+
return (kTwoSymbolHistogramCost +
|
74
|
+
static_cast<double>(histogram.total_count_));
|
70
75
|
}
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
76
|
+
if (count == 3) {
|
77
|
+
const uint32_t histo0 = histogram.data_[s[0]];
|
78
|
+
const uint32_t histo1 = histogram.data_[s[1]];
|
79
|
+
const uint32_t histo2 = histogram.data_[s[2]];
|
80
|
+
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
|
81
|
+
return (kThreeSymbolHistogramCost +
|
82
|
+
2 * (histo0 + histo1 + histo2) - histomax);
|
83
|
+
}
|
84
|
+
if (count == 4) {
|
85
|
+
uint32_t histo[4];
|
86
|
+
for (int i = 0; i < 4; ++i) {
|
87
|
+
histo[i] = histogram.data_[s[i]];
|
78
88
|
}
|
79
|
-
|
89
|
+
// Sort
|
90
|
+
for (int i = 0; i < 4; ++i) {
|
91
|
+
for (int j = i + 1; j < 4; ++j) {
|
92
|
+
if (histo[j] > histo[i]) {
|
93
|
+
std::swap(histo[j], histo[i]);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
const uint32_t h23 = histo[2] + histo[3];
|
98
|
+
const uint32_t histomax = std::max(h23, histo[0]);
|
99
|
+
return (kFourSymbolHistogramCost +
|
100
|
+
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
80
101
|
}
|
81
102
|
|
82
103
|
// In this loop we compute the entropy of the histogram and simultaneously
|
83
104
|
// build a simplified histogram of the code length codes where we use the
|
84
105
|
// zero repeat code 17, but we don't use the non-zero repeat code 16.
|
85
|
-
|
86
|
-
|
106
|
+
double bits = 0;
|
107
|
+
size_t max_depth = 1;
|
108
|
+
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
|
87
109
|
const double log2total = FastLog2(histogram.total_count_);
|
88
|
-
for (
|
110
|
+
for (size_t i = 0; i < kSize;) {
|
89
111
|
if (histogram.data_[i] > 0) {
|
90
112
|
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
91
113
|
// = log2(total_count) - log2(count(symbol))
|
92
114
|
double log2p = log2total - FastLog2(histogram.data_[i]);
|
93
115
|
// Approximate the bit depth by round(-log2(P(symbol)))
|
94
|
-
|
116
|
+
size_t depth = static_cast<size_t>(log2p + 0.5);
|
95
117
|
bits += histogram.data_[i] * log2p;
|
96
118
|
if (depth > 15) {
|
97
119
|
depth = 15;
|
@@ -104,8 +126,8 @@ double PopulationCost(const Histogram<kSize>& histogram) {
|
|
104
126
|
} else {
|
105
127
|
// Compute the run length of zeros and add the appropriate number of 0 and
|
106
128
|
// 17 code length codes to the code length code histogram.
|
107
|
-
|
108
|
-
for (
|
129
|
+
uint32_t reps = 1;
|
130
|
+
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
|
109
131
|
++reps;
|
110
132
|
}
|
111
133
|
i += reps;
|
@@ -128,7 +150,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
|
|
128
150
|
}
|
129
151
|
}
|
130
152
|
// Add the estimated encoding cost of the code length code histogram.
|
131
|
-
bits += 18 + 2 * max_depth;
|
153
|
+
bits += static_cast<double>(18 + 2 * max_depth);
|
132
154
|
// Add the entropy of the code length code histogram.
|
133
155
|
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
|
134
156
|
return bits;
|