brotli 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/brotli/brotli.cc +114 -24
- data/ext/brotli/brotli.h +0 -1
- data/ext/brotli/extconf.rb +30 -23
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +1 -1
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +3 -3
- data/vendor/brotli/dec/bit_reader.h +25 -27
- data/vendor/brotli/dec/context.h +4 -4
- data/vendor/brotli/dec/decode.c +410 -486
- data/vendor/brotli/dec/decode.h +101 -105
- data/vendor/brotli/dec/dictionary.c +1 -1
- data/vendor/brotli/dec/dictionary.h +7 -8
- data/vendor/brotli/dec/huffman.c +103 -105
- data/vendor/brotli/dec/huffman.h +18 -18
- data/vendor/brotli/dec/port.h +52 -40
- data/vendor/brotli/dec/prefix.h +2 -0
- data/vendor/brotli/dec/state.c +13 -19
- data/vendor/brotli/dec/state.h +25 -39
- data/vendor/brotli/dec/transform.h +38 -44
- data/vendor/brotli/dec/types.h +2 -2
- data/vendor/brotli/enc/Makefile +1 -1
- data/vendor/brotli/enc/backward_references.cc +455 -359
- data/vendor/brotli/enc/backward_references.h +79 -3
- data/vendor/brotli/enc/bit_cost.h +54 -32
- data/vendor/brotli/enc/block_splitter.cc +285 -193
- data/vendor/brotli/enc/block_splitter.h +4 -12
- data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
- data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
- data/vendor/brotli/enc/cluster.h +161 -120
- data/vendor/brotli/enc/command.h +60 -37
- data/vendor/brotli/enc/compress_fragment.cc +701 -0
- data/vendor/brotli/enc/compress_fragment.h +47 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
- data/vendor/brotli/enc/compressor.h +15 -0
- data/vendor/brotli/enc/context.h +1 -1
- data/vendor/brotli/enc/dictionary.h +2 -2
- data/vendor/brotli/enc/encode.cc +819 -286
- data/vendor/brotli/enc/encode.h +38 -15
- data/vendor/brotli/enc/encode_parallel.cc +40 -42
- data/vendor/brotli/enc/entropy_encode.cc +144 -147
- data/vendor/brotli/enc/entropy_encode.h +32 -8
- data/vendor/brotli/enc/entropy_encode_static.h +572 -0
- data/vendor/brotli/enc/fast_log.h +7 -40
- data/vendor/brotli/enc/find_match_length.h +9 -9
- data/vendor/brotli/enc/hash.h +462 -154
- data/vendor/brotli/enc/histogram.cc +6 -6
- data/vendor/brotli/enc/histogram.h +13 -13
- data/vendor/brotli/enc/literal_cost.cc +45 -45
- data/vendor/brotli/enc/metablock.cc +92 -89
- data/vendor/brotli/enc/metablock.h +12 -12
- data/vendor/brotli/enc/port.h +7 -16
- data/vendor/brotli/enc/prefix.h +23 -22
- data/vendor/brotli/enc/ringbuffer.h +75 -29
- data/vendor/brotli/enc/static_dict.cc +56 -48
- data/vendor/brotli/enc/static_dict.h +5 -5
- data/vendor/brotli/enc/streams.cc +1 -1
- data/vendor/brotli/enc/streams.h +5 -5
- data/vendor/brotli/enc/transform.h +40 -35
- data/vendor/brotli/enc/types.h +2 -0
- data/vendor/brotli/enc/utf8_util.cc +3 -2
- data/vendor/brotli/enc/write_bits.h +6 -6
- metadata +9 -5
- data/vendor/brotli/dec/streams.c +0 -102
- data/vendor/brotli/dec/streams.h +0 -95
@@ -23,17 +23,93 @@ namespace brotli {
|
|
23
23
|
// by this call.
|
24
24
|
void CreateBackwardReferences(size_t num_bytes,
|
25
25
|
size_t position,
|
26
|
+
bool is_last,
|
26
27
|
const uint8_t* ringbuffer,
|
27
28
|
size_t ringbuffer_mask,
|
28
|
-
const size_t max_backward_limit,
|
29
29
|
const int quality,
|
30
|
+
const int lgwin,
|
30
31
|
Hashers* hashers,
|
31
32
|
int hash_type,
|
32
33
|
int* dist_cache,
|
33
|
-
|
34
|
+
size_t* last_insert_len,
|
34
35
|
Command* commands,
|
35
36
|
size_t* num_commands,
|
36
|
-
|
37
|
+
size_t* num_literals);
|
38
|
+
|
39
|
+
static const float kInfinity = std::numeric_limits<float>::infinity();
|
40
|
+
|
41
|
+
struct ZopfliNode {
|
42
|
+
ZopfliNode(void) : length(1),
|
43
|
+
distance(0),
|
44
|
+
insert_length(0),
|
45
|
+
cost(kInfinity) {}
|
46
|
+
|
47
|
+
inline uint32_t copy_length() const {
|
48
|
+
return length & 0xffffff;
|
49
|
+
}
|
50
|
+
|
51
|
+
inline uint32_t length_code() const {
|
52
|
+
const uint32_t modifier = length >> 24;
|
53
|
+
return copy_length() + 9u - modifier;
|
54
|
+
}
|
55
|
+
|
56
|
+
inline uint32_t copy_distance() const {
|
57
|
+
return distance & 0x1ffffff;
|
58
|
+
}
|
59
|
+
|
60
|
+
inline uint32_t distance_code() const {
|
61
|
+
const uint32_t short_code = distance >> 25;
|
62
|
+
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
|
63
|
+
}
|
64
|
+
|
65
|
+
inline uint32_t command_length() const {
|
66
|
+
return copy_length() + insert_length;
|
67
|
+
}
|
68
|
+
|
69
|
+
// best length to get up to this byte (not including this byte itself)
|
70
|
+
// highest 8 bit is used to reconstruct the length code
|
71
|
+
uint32_t length;
|
72
|
+
// distance associated with the length
|
73
|
+
// highest 7 bit contains distance short code + 1 (or zero if no short code)
|
74
|
+
uint32_t distance;
|
75
|
+
// number of literal inserts before this copy
|
76
|
+
uint32_t insert_length;
|
77
|
+
// smallest cost to get to this byte from the beginning, as found so far
|
78
|
+
float cost;
|
79
|
+
};
|
80
|
+
|
81
|
+
// Computes the shortest path of commands from position to at most
|
82
|
+
// position + num_bytes.
|
83
|
+
//
|
84
|
+
// On return, path->size() is the number of commands found and path[i] is the
|
85
|
+
// length of the ith command (copy length plus insert length).
|
86
|
+
// Note that the sum of the lengths of all commands can be less than num_bytes.
|
87
|
+
//
|
88
|
+
// On return, the nodes[0..num_bytes] array will have the following
|
89
|
+
// "ZopfliNode array invariant":
|
90
|
+
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
91
|
+
// (1) nodes[i].copy_length() >= 2
|
92
|
+
// (2) nodes[i].command_length() <= i and
|
93
|
+
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
|
94
|
+
void ZopfliComputeShortestPath(size_t num_bytes,
|
95
|
+
size_t position,
|
96
|
+
const uint8_t* ringbuffer,
|
97
|
+
size_t ringbuffer_mask,
|
98
|
+
const size_t max_backward_limit,
|
99
|
+
const int* dist_cache,
|
100
|
+
Hashers::H10* hasher,
|
101
|
+
ZopfliNode* nodes,
|
102
|
+
std::vector<uint32_t>* path);
|
103
|
+
|
104
|
+
void ZopfliCreateCommands(const size_t num_bytes,
|
105
|
+
const size_t block_start,
|
106
|
+
const size_t max_backward_limit,
|
107
|
+
const std::vector<uint32_t>& path,
|
108
|
+
const ZopfliNode* nodes,
|
109
|
+
int* dist_cache,
|
110
|
+
size_t* last_insert_len,
|
111
|
+
Command* commands,
|
112
|
+
size_t* num_literals);
|
37
113
|
|
38
114
|
} // namespace brotli
|
39
115
|
|
@@ -9,89 +9,111 @@
|
|
9
9
|
#ifndef BROTLI_ENC_BIT_COST_H_
|
10
10
|
#define BROTLI_ENC_BIT_COST_H_
|
11
11
|
|
12
|
-
|
13
|
-
|
14
12
|
#include "./entropy_encode.h"
|
15
13
|
#include "./fast_log.h"
|
16
14
|
#include "./types.h"
|
17
15
|
|
18
16
|
namespace brotli {
|
19
17
|
|
20
|
-
static inline double ShannonEntropy(const
|
21
|
-
|
22
|
-
|
18
|
+
static inline double ShannonEntropy(const uint32_t *population, size_t size,
|
19
|
+
size_t *total) {
|
20
|
+
size_t sum = 0;
|
23
21
|
double retval = 0;
|
24
|
-
const
|
25
|
-
|
22
|
+
const uint32_t *population_end = population + size;
|
23
|
+
size_t p;
|
26
24
|
if (size & 1) {
|
27
25
|
goto odd_number_of_elements_left;
|
28
26
|
}
|
29
27
|
while (population < population_end) {
|
30
28
|
p = *population++;
|
31
29
|
sum += p;
|
32
|
-
retval -= p * FastLog2(p);
|
30
|
+
retval -= static_cast<double>(p) * FastLog2(p);
|
33
31
|
odd_number_of_elements_left:
|
34
32
|
p = *population++;
|
35
33
|
sum += p;
|
36
|
-
retval -= p * FastLog2(p);
|
34
|
+
retval -= static_cast<double>(p) * FastLog2(p);
|
37
35
|
}
|
38
|
-
if (sum) retval += sum * FastLog2(sum);
|
36
|
+
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
|
39
37
|
*total = sum;
|
40
38
|
return retval;
|
41
39
|
}
|
42
40
|
|
43
|
-
static inline double BitsEntropy(const
|
44
|
-
|
41
|
+
static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
42
|
+
size_t sum;
|
45
43
|
double retval = ShannonEntropy(population, size, &sum);
|
46
44
|
if (retval < sum) {
|
47
45
|
// At least one bit per literal is needed.
|
48
|
-
retval = sum;
|
46
|
+
retval = static_cast<double>(sum);
|
49
47
|
}
|
50
48
|
return retval;
|
51
49
|
}
|
52
50
|
|
53
|
-
|
54
51
|
template<int kSize>
|
55
52
|
double PopulationCost(const Histogram<kSize>& histogram) {
|
53
|
+
static const double kOneSymbolHistogramCost = 12;
|
54
|
+
static const double kTwoSymbolHistogramCost = 20;
|
55
|
+
static const double kThreeSymbolHistogramCost = 28;
|
56
|
+
static const double kFourSymbolHistogramCost = 37;
|
56
57
|
if (histogram.total_count_ == 0) {
|
57
|
-
return
|
58
|
+
return kOneSymbolHistogramCost;
|
58
59
|
}
|
59
60
|
int count = 0;
|
61
|
+
int s[5];
|
60
62
|
for (int i = 0; i < kSize; ++i) {
|
61
63
|
if (histogram.data_[i] > 0) {
|
64
|
+
s[count] = i;
|
62
65
|
++count;
|
66
|
+
if (count > 4) break;
|
63
67
|
}
|
64
68
|
}
|
65
69
|
if (count == 1) {
|
66
|
-
return
|
70
|
+
return kOneSymbolHistogramCost;
|
67
71
|
}
|
68
72
|
if (count == 2) {
|
69
|
-
return
|
73
|
+
return (kTwoSymbolHistogramCost +
|
74
|
+
static_cast<double>(histogram.total_count_));
|
70
75
|
}
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
76
|
+
if (count == 3) {
|
77
|
+
const uint32_t histo0 = histogram.data_[s[0]];
|
78
|
+
const uint32_t histo1 = histogram.data_[s[1]];
|
79
|
+
const uint32_t histo2 = histogram.data_[s[2]];
|
80
|
+
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
|
81
|
+
return (kThreeSymbolHistogramCost +
|
82
|
+
2 * (histo0 + histo1 + histo2) - histomax);
|
83
|
+
}
|
84
|
+
if (count == 4) {
|
85
|
+
uint32_t histo[4];
|
86
|
+
for (int i = 0; i < 4; ++i) {
|
87
|
+
histo[i] = histogram.data_[s[i]];
|
78
88
|
}
|
79
|
-
|
89
|
+
// Sort
|
90
|
+
for (int i = 0; i < 4; ++i) {
|
91
|
+
for (int j = i + 1; j < 4; ++j) {
|
92
|
+
if (histo[j] > histo[i]) {
|
93
|
+
std::swap(histo[j], histo[i]);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
const uint32_t h23 = histo[2] + histo[3];
|
98
|
+
const uint32_t histomax = std::max(h23, histo[0]);
|
99
|
+
return (kFourSymbolHistogramCost +
|
100
|
+
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
80
101
|
}
|
81
102
|
|
82
103
|
// In this loop we compute the entropy of the histogram and simultaneously
|
83
104
|
// build a simplified histogram of the code length codes where we use the
|
84
105
|
// zero repeat code 17, but we don't use the non-zero repeat code 16.
|
85
|
-
|
86
|
-
|
106
|
+
double bits = 0;
|
107
|
+
size_t max_depth = 1;
|
108
|
+
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
|
87
109
|
const double log2total = FastLog2(histogram.total_count_);
|
88
|
-
for (
|
110
|
+
for (size_t i = 0; i < kSize;) {
|
89
111
|
if (histogram.data_[i] > 0) {
|
90
112
|
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
91
113
|
// = log2(total_count) - log2(count(symbol))
|
92
114
|
double log2p = log2total - FastLog2(histogram.data_[i]);
|
93
115
|
// Approximate the bit depth by round(-log2(P(symbol)))
|
94
|
-
|
116
|
+
size_t depth = static_cast<size_t>(log2p + 0.5);
|
95
117
|
bits += histogram.data_[i] * log2p;
|
96
118
|
if (depth > 15) {
|
97
119
|
depth = 15;
|
@@ -104,8 +126,8 @@ double PopulationCost(const Histogram<kSize>& histogram) {
|
|
104
126
|
} else {
|
105
127
|
// Compute the run length of zeros and add the appropriate number of 0 and
|
106
128
|
// 17 code length codes to the code length code histogram.
|
107
|
-
|
108
|
-
for (
|
129
|
+
uint32_t reps = 1;
|
130
|
+
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
|
109
131
|
++reps;
|
110
132
|
}
|
111
133
|
i += reps;
|
@@ -128,7 +150,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
|
|
128
150
|
}
|
129
151
|
}
|
130
152
|
// Add the estimated encoding cost of the code length code histogram.
|
131
|
-
bits += 18 + 2 * max_depth;
|
153
|
+
bits += static_cast<double>(18 + 2 * max_depth);
|
132
154
|
// Add the entropy of the code length code histogram.
|
133
155
|
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
|
134
156
|
return bits;
|