brotli 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/brotli/brotli.cc +114 -24
- data/ext/brotli/brotli.h +0 -1
- data/ext/brotli/extconf.rb +30 -23
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +1 -1
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +3 -3
- data/vendor/brotli/dec/bit_reader.h +25 -27
- data/vendor/brotli/dec/context.h +4 -4
- data/vendor/brotli/dec/decode.c +410 -486
- data/vendor/brotli/dec/decode.h +101 -105
- data/vendor/brotli/dec/dictionary.c +1 -1
- data/vendor/brotli/dec/dictionary.h +7 -8
- data/vendor/brotli/dec/huffman.c +103 -105
- data/vendor/brotli/dec/huffman.h +18 -18
- data/vendor/brotli/dec/port.h +52 -40
- data/vendor/brotli/dec/prefix.h +2 -0
- data/vendor/brotli/dec/state.c +13 -19
- data/vendor/brotli/dec/state.h +25 -39
- data/vendor/brotli/dec/transform.h +38 -44
- data/vendor/brotli/dec/types.h +2 -2
- data/vendor/brotli/enc/Makefile +1 -1
- data/vendor/brotli/enc/backward_references.cc +455 -359
- data/vendor/brotli/enc/backward_references.h +79 -3
- data/vendor/brotli/enc/bit_cost.h +54 -32
- data/vendor/brotli/enc/block_splitter.cc +285 -193
- data/vendor/brotli/enc/block_splitter.h +4 -12
- data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
- data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
- data/vendor/brotli/enc/cluster.h +161 -120
- data/vendor/brotli/enc/command.h +60 -37
- data/vendor/brotli/enc/compress_fragment.cc +701 -0
- data/vendor/brotli/enc/compress_fragment.h +47 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
- data/vendor/brotli/enc/compressor.h +15 -0
- data/vendor/brotli/enc/context.h +1 -1
- data/vendor/brotli/enc/dictionary.h +2 -2
- data/vendor/brotli/enc/encode.cc +819 -286
- data/vendor/brotli/enc/encode.h +38 -15
- data/vendor/brotli/enc/encode_parallel.cc +40 -42
- data/vendor/brotli/enc/entropy_encode.cc +144 -147
- data/vendor/brotli/enc/entropy_encode.h +32 -8
- data/vendor/brotli/enc/entropy_encode_static.h +572 -0
- data/vendor/brotli/enc/fast_log.h +7 -40
- data/vendor/brotli/enc/find_match_length.h +9 -9
- data/vendor/brotli/enc/hash.h +462 -154
- data/vendor/brotli/enc/histogram.cc +6 -6
- data/vendor/brotli/enc/histogram.h +13 -13
- data/vendor/brotli/enc/literal_cost.cc +45 -45
- data/vendor/brotli/enc/metablock.cc +92 -89
- data/vendor/brotli/enc/metablock.h +12 -12
- data/vendor/brotli/enc/port.h +7 -16
- data/vendor/brotli/enc/prefix.h +23 -22
- data/vendor/brotli/enc/ringbuffer.h +75 -29
- data/vendor/brotli/enc/static_dict.cc +56 -48
- data/vendor/brotli/enc/static_dict.h +5 -5
- data/vendor/brotli/enc/streams.cc +1 -1
- data/vendor/brotli/enc/streams.h +5 -5
- data/vendor/brotli/enc/transform.h +40 -35
- data/vendor/brotli/enc/types.h +2 -0
- data/vendor/brotli/enc/utf8_util.cc +3 -2
- data/vendor/brotli/enc/write_bits.h +6 -6
- metadata +9 -5
- data/vendor/brotli/dec/streams.c +0 -102
- data/vendor/brotli/dec/streams.h +0 -95
@@ -28,7 +28,7 @@ void BuildHistograms(
|
|
28
28
|
size_t mask,
|
29
29
|
uint8_t prev_byte,
|
30
30
|
uint8_t prev_byte2,
|
31
|
-
const std::vector<
|
31
|
+
const std::vector<ContextType>& context_modes,
|
32
32
|
std::vector<HistogramLiteral>* literal_histograms,
|
33
33
|
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
34
34
|
std::vector<HistogramDistance>* copy_dist_histograms) {
|
@@ -41,22 +41,22 @@ void BuildHistograms(
|
|
41
41
|
insert_and_copy_it.Next();
|
42
42
|
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
43
43
|
cmd.cmd_prefix_);
|
44
|
-
for (
|
44
|
+
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
45
45
|
literal_it.Next();
|
46
|
-
|
46
|
+
size_t context = (literal_it.type_ << kLiteralContextBits) +
|
47
47
|
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
48
48
|
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
49
49
|
prev_byte2 = prev_byte;
|
50
50
|
prev_byte = ringbuffer[pos & mask];
|
51
51
|
++pos;
|
52
52
|
}
|
53
|
-
pos += cmd.
|
54
|
-
if (cmd.
|
53
|
+
pos += cmd.copy_len();
|
54
|
+
if (cmd.copy_len()) {
|
55
55
|
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
56
56
|
prev_byte = ringbuffer[(pos - 1) & mask];
|
57
57
|
if (cmd.cmd_prefix_ >= 128) {
|
58
58
|
dist_it.Next();
|
59
|
-
|
59
|
+
size_t context = (dist_it.type_ << kDistanceContextBits) +
|
60
60
|
cmd.DistanceContext();
|
61
61
|
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
62
62
|
}
|
@@ -9,10 +9,10 @@
|
|
9
9
|
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
10
10
|
#define BROTLI_ENC_HISTOGRAM_H_
|
11
11
|
|
12
|
-
#include <
|
12
|
+
#include <cstring>
|
13
13
|
#include <limits>
|
14
14
|
#include <vector>
|
15
|
-
#include
|
15
|
+
#include "./context.h"
|
16
16
|
#include "./command.h"
|
17
17
|
#include "./fast_log.h"
|
18
18
|
#include "./prefix.h"
|
@@ -25,37 +25,37 @@ struct BlockSplit;
|
|
25
25
|
// A simple container for histograms of data in blocks.
|
26
26
|
template<int kDataSize>
|
27
27
|
struct Histogram {
|
28
|
-
Histogram() {
|
28
|
+
Histogram(void) {
|
29
29
|
Clear();
|
30
30
|
}
|
31
|
-
void Clear() {
|
31
|
+
void Clear(void) {
|
32
32
|
memset(data_, 0, sizeof(data_));
|
33
33
|
total_count_ = 0;
|
34
34
|
bit_cost_ = std::numeric_limits<double>::infinity();
|
35
35
|
}
|
36
|
-
void Add(
|
36
|
+
void Add(size_t val) {
|
37
37
|
++data_[val];
|
38
38
|
++total_count_;
|
39
39
|
}
|
40
|
-
void Remove(
|
40
|
+
void Remove(size_t val) {
|
41
41
|
--data_[val];
|
42
42
|
--total_count_;
|
43
43
|
}
|
44
44
|
template<typename DataType>
|
45
45
|
void Add(const DataType *p, size_t n) {
|
46
|
-
total_count_ +=
|
46
|
+
total_count_ += n;
|
47
47
|
n += 1;
|
48
48
|
while(--n) ++data_[*p++];
|
49
49
|
}
|
50
50
|
void AddHistogram(const Histogram& v) {
|
51
51
|
total_count_ += v.total_count_;
|
52
|
-
for (
|
52
|
+
for (size_t i = 0; i < kDataSize; ++i) {
|
53
53
|
data_[i] += v.data_[i];
|
54
54
|
}
|
55
55
|
}
|
56
56
|
|
57
|
-
|
58
|
-
|
57
|
+
uint32_t data_[kDataSize];
|
58
|
+
size_t total_count_;
|
59
59
|
double bit_cost_;
|
60
60
|
};
|
61
61
|
|
@@ -70,8 +70,8 @@ typedef Histogram<272> HistogramContextMap;
|
|
70
70
|
// Block type histogram, 256 block types + 2 special symbols.
|
71
71
|
typedef Histogram<258> HistogramBlockType;
|
72
72
|
|
73
|
-
static const
|
74
|
-
static const
|
73
|
+
static const size_t kLiteralContextBits = 6;
|
74
|
+
static const size_t kDistanceContextBits = 2;
|
75
75
|
|
76
76
|
void BuildHistograms(
|
77
77
|
const Command* cmds,
|
@@ -84,7 +84,7 @@ void BuildHistograms(
|
|
84
84
|
size_t mask,
|
85
85
|
uint8_t prev_byte,
|
86
86
|
uint8_t prev_byte2,
|
87
|
-
const std::vector<
|
87
|
+
const std::vector<ContextType>& context_modes,
|
88
88
|
std::vector<HistogramLiteral>* literal_histograms,
|
89
89
|
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
90
90
|
std::vector<HistogramDistance>* copy_dist_histograms);
|
@@ -17,29 +17,29 @@
|
|
17
17
|
|
18
18
|
namespace brotli {
|
19
19
|
|
20
|
-
static
|
20
|
+
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
21
21
|
if (c < 128) {
|
22
22
|
return 0; // Next one is the 'Byte 1' again.
|
23
|
-
} else if (c >= 192) {
|
24
|
-
return std::min(1, clamp);
|
23
|
+
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
|
24
|
+
return std::min<size_t>(1, clamp);
|
25
25
|
} else {
|
26
26
|
// Let's decide over the last byte if this ends the sequence.
|
27
27
|
if (last < 0xe0) {
|
28
28
|
return 0; // Completed two or three byte coding.
|
29
|
-
} else {
|
30
|
-
return std::min(2, clamp);
|
29
|
+
} else { // Next one is the 'Byte 3' of utf-8 encoding.
|
30
|
+
return std::min<size_t>(2, clamp);
|
31
31
|
}
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
-
static
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
36
|
+
const uint8_t *data) {
|
37
|
+
size_t counts[3] = { 0 };
|
38
|
+
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
|
39
|
+
size_t last_c = 0;
|
40
|
+
size_t utf8_pos = 0;
|
41
41
|
for (size_t i = 0; i < len; ++i) {
|
42
|
-
|
42
|
+
size_t c = data[(pos + i) & mask];
|
43
43
|
utf8_pos = UTF8Position(last_c, c, 2);
|
44
44
|
++counts[utf8_pos];
|
45
45
|
last_c = c;
|
@@ -53,22 +53,22 @@ static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
|
53
53
|
return max_utf8;
|
54
54
|
}
|
55
55
|
|
56
|
-
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
57
|
-
|
56
|
+
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
57
|
+
const uint8_t *data, float *cost) {
|
58
58
|
|
59
59
|
// max_utf8 is 0 (normal ascii single byte modeling),
|
60
60
|
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
61
|
-
const
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
61
|
+
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
62
|
+
size_t histogram[3][256] = { { 0 } };
|
63
|
+
size_t window_half = 495;
|
64
|
+
size_t in_window = std::min(window_half, len);
|
65
|
+
size_t in_window_utf8[3] = { 0 };
|
66
66
|
|
67
67
|
// Bootstrap histograms.
|
68
|
-
|
69
|
-
|
70
|
-
for (
|
71
|
-
|
68
|
+
size_t last_c = 0;
|
69
|
+
size_t utf8_pos = 0;
|
70
|
+
for (size_t i = 0; i < in_window; ++i) {
|
71
|
+
size_t c = data[(pos + i) & mask];
|
72
72
|
++histogram[utf8_pos][c];
|
73
73
|
++in_window_utf8[utf8_pos];
|
74
74
|
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
@@ -76,30 +76,30 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
|
76
76
|
}
|
77
77
|
|
78
78
|
// Compute bit costs with sliding window.
|
79
|
-
for (
|
80
|
-
if (i
|
79
|
+
for (size_t i = 0; i < len; ++i) {
|
80
|
+
if (i >= window_half) {
|
81
81
|
// Remove a byte in the past.
|
82
|
-
|
82
|
+
size_t c = i < window_half + 1 ?
|
83
83
|
0 : data[(pos + i - window_half - 1) & mask];
|
84
|
-
|
84
|
+
size_t last_c = i < window_half + 2 ?
|
85
85
|
0 : data[(pos + i - window_half - 2) & mask];
|
86
|
-
|
86
|
+
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
87
87
|
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
88
88
|
--in_window_utf8[utf8_pos2];
|
89
89
|
}
|
90
|
-
if (i + window_half <
|
90
|
+
if (i + window_half < len) {
|
91
91
|
// Add a byte in the future.
|
92
|
-
|
93
|
-
|
94
|
-
|
92
|
+
size_t c = data[(pos + i + window_half - 1) & mask];
|
93
|
+
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
94
|
+
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
95
95
|
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
96
96
|
++in_window_utf8[utf8_pos2];
|
97
97
|
}
|
98
|
-
|
99
|
-
|
100
|
-
|
98
|
+
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
99
|
+
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
100
|
+
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
101
101
|
size_t masked_pos = (pos + i) & mask;
|
102
|
-
|
102
|
+
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
103
103
|
if (histo == 0) {
|
104
104
|
histo = 1;
|
105
105
|
}
|
@@ -114,7 +114,7 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
|
114
114
|
// rapidly in the beginning of the file, perhaps because the beginning
|
115
115
|
// of the data is a statistical "anomaly".
|
116
116
|
if (i < 2000) {
|
117
|
-
lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
|
117
|
+
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
|
118
118
|
}
|
119
119
|
cost[i] = static_cast<float>(lit_cost);
|
120
120
|
}
|
@@ -126,28 +126,28 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
|
126
126
|
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
127
127
|
return;
|
128
128
|
}
|
129
|
-
|
130
|
-
|
131
|
-
|
129
|
+
size_t histogram[256] = { 0 };
|
130
|
+
size_t window_half = 2000;
|
131
|
+
size_t in_window = std::min(window_half, len);
|
132
132
|
|
133
133
|
// Bootstrap histogram.
|
134
|
-
for (
|
134
|
+
for (size_t i = 0; i < in_window; ++i) {
|
135
135
|
++histogram[data[(pos + i) & mask]];
|
136
136
|
}
|
137
137
|
|
138
138
|
// Compute bit costs with sliding window.
|
139
|
-
for (
|
140
|
-
if (i
|
139
|
+
for (size_t i = 0; i < len; ++i) {
|
140
|
+
if (i >= window_half) {
|
141
141
|
// Remove a byte in the past.
|
142
142
|
--histogram[data[(pos + i - window_half) & mask]];
|
143
143
|
--in_window;
|
144
144
|
}
|
145
|
-
if (i + window_half <
|
145
|
+
if (i + window_half < len) {
|
146
146
|
// Add a byte in the future.
|
147
147
|
++histogram[data[(pos + i + window_half) & mask]];
|
148
148
|
++in_window;
|
149
149
|
}
|
150
|
-
|
150
|
+
size_t histo = histogram[data[(pos + i) & mask]];
|
151
151
|
if (histo == 0) {
|
152
152
|
histo = 1;
|
153
153
|
}
|