brotli 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +11 -3
  4. data/Gemfile +2 -0
  5. data/ext/brotli/brotli.c +279 -0
  6. data/ext/brotli/brotli.h +2 -0
  7. data/ext/brotli/buffer.c +95 -0
  8. data/ext/brotli/buffer.h +19 -0
  9. data/ext/brotli/extconf.rb +21 -81
  10. data/lib/brotli/version.rb +1 -1
  11. data/vendor/brotli/dec/bit_reader.c +5 -5
  12. data/vendor/brotli/dec/bit_reader.h +15 -15
  13. data/vendor/brotli/dec/context.h +1 -1
  14. data/vendor/brotli/dec/decode.c +433 -348
  15. data/vendor/brotli/dec/decode.h +74 -48
  16. data/vendor/brotli/dec/huffman.c +5 -4
  17. data/vendor/brotli/dec/huffman.h +4 -4
  18. data/vendor/brotli/dec/port.h +2 -95
  19. data/vendor/brotli/dec/prefix.h +5 -3
  20. data/vendor/brotli/dec/state.c +15 -27
  21. data/vendor/brotli/dec/state.h +21 -17
  22. data/vendor/brotli/dec/transform.h +1 -1
  23. data/vendor/brotli/enc/backward_references.c +892 -0
  24. data/vendor/brotli/enc/backward_references.h +85 -102
  25. data/vendor/brotli/enc/backward_references_inc.h +147 -0
  26. data/vendor/brotli/enc/bit_cost.c +35 -0
  27. data/vendor/brotli/enc/bit_cost.h +23 -121
  28. data/vendor/brotli/enc/bit_cost_inc.h +127 -0
  29. data/vendor/brotli/enc/block_encoder_inc.h +33 -0
  30. data/vendor/brotli/enc/block_splitter.c +197 -0
  31. data/vendor/brotli/enc/block_splitter.h +40 -50
  32. data/vendor/brotli/enc/block_splitter_inc.h +432 -0
  33. data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
  34. data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
  35. data/vendor/brotli/enc/cluster.c +56 -0
  36. data/vendor/brotli/enc/cluster.h +23 -305
  37. data/vendor/brotli/enc/cluster_inc.h +315 -0
  38. data/vendor/brotli/enc/command.h +83 -76
  39. data/vendor/brotli/enc/compress_fragment.c +747 -0
  40. data/vendor/brotli/enc/compress_fragment.h +48 -37
  41. data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
  42. data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
  43. data/vendor/brotli/enc/compressor.cc +139 -0
  44. data/vendor/brotli/enc/compressor.h +146 -0
  45. data/vendor/brotli/enc/context.h +102 -96
  46. data/vendor/brotli/enc/dictionary_hash.h +9 -5
  47. data/vendor/brotli/enc/encode.c +1562 -0
  48. data/vendor/brotli/enc/encode.h +211 -199
  49. data/vendor/brotli/enc/encode_parallel.cc +161 -151
  50. data/vendor/brotli/enc/encode_parallel.h +7 -8
  51. data/vendor/brotli/enc/entropy_encode.c +501 -0
  52. data/vendor/brotli/enc/entropy_encode.h +107 -89
  53. data/vendor/brotli/enc/entropy_encode_static.h +29 -62
  54. data/vendor/brotli/enc/fast_log.h +26 -20
  55. data/vendor/brotli/enc/find_match_length.h +23 -20
  56. data/vendor/brotli/enc/hash.h +614 -871
  57. data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
  58. data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
  59. data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
  60. data/vendor/brotli/enc/histogram.c +95 -0
  61. data/vendor/brotli/enc/histogram.h +49 -83
  62. data/vendor/brotli/enc/histogram_inc.h +51 -0
  63. data/vendor/brotli/enc/literal_cost.c +178 -0
  64. data/vendor/brotli/enc/literal_cost.h +16 -10
  65. data/vendor/brotli/enc/memory.c +181 -0
  66. data/vendor/brotli/enc/memory.h +62 -0
  67. data/vendor/brotli/enc/metablock.c +515 -0
  68. data/vendor/brotli/enc/metablock.h +87 -57
  69. data/vendor/brotli/enc/metablock_inc.h +183 -0
  70. data/vendor/brotli/enc/port.h +73 -47
  71. data/vendor/brotli/enc/prefix.h +34 -61
  72. data/vendor/brotli/enc/quality.h +130 -0
  73. data/vendor/brotli/enc/ringbuffer.h +137 -122
  74. data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
  75. data/vendor/brotli/enc/static_dict.h +23 -18
  76. data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
  77. data/vendor/brotli/enc/streams.cc +7 -7
  78. data/vendor/brotli/enc/streams.h +32 -32
  79. data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
  80. data/vendor/brotli/enc/utf8_util.h +16 -9
  81. data/vendor/brotli/enc/write_bits.h +49 -43
  82. metadata +34 -25
  83. data/ext/brotli/brotli.cc +0 -181
  84. data/vendor/brotli/dec/Makefile +0 -12
  85. data/vendor/brotli/dec/dictionary.c +0 -9466
  86. data/vendor/brotli/dec/dictionary.h +0 -38
  87. data/vendor/brotli/dec/types.h +0 -38
  88. data/vendor/brotli/enc/Makefile +0 -14
  89. data/vendor/brotli/enc/backward_references.cc +0 -858
  90. data/vendor/brotli/enc/block_splitter.cc +0 -505
  91. data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
  92. data/vendor/brotli/enc/compress_fragment.cc +0 -701
  93. data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
  94. data/vendor/brotli/enc/dictionary.cc +0 -9466
  95. data/vendor/brotli/enc/dictionary.h +0 -41
  96. data/vendor/brotli/enc/encode.cc +0 -1180
  97. data/vendor/brotli/enc/entropy_encode.cc +0 -480
  98. data/vendor/brotli/enc/histogram.cc +0 -67
  99. data/vendor/brotli/enc/literal_cost.cc +0 -165
  100. data/vendor/brotli/enc/metablock.cc +0 -539
  101. data/vendor/brotli/enc/transform.h +0 -248
  102. data/vendor/brotli/enc/types.h +0 -29
@@ -0,0 +1,230 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2010 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, USE_DICTIONARY */
9
+
10
+ #define HashLongestMatchQuickly HASHER()
11
+
12
+ #define BUCKET_SIZE (1 << BUCKET_BITS)
13
+
14
+ #define HASH_MAP_SIZE (4 << BUCKET_BITS)
15
+
16
+ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
17
+ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
18
+
19
+ /* HashBytes is the function that chooses the bucket to place
20
+ the address in. The HashLongestMatch and HashLongestMatchQuickly
21
+ classes have separate, different implementations of hashing. */
22
+ static uint32_t FN(HashBytes)(const uint8_t *data) {
23
+ /* Computing a hash based on 5 bytes works much better for
24
+ qualities 1 and 3, where the next hash value is likely to replace */
25
+ uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
26
+ /* The higher bits contain more mixture from the multiplication,
27
+ so we take our results from there. */
28
+ return (uint32_t)(h >> (64 - BUCKET_BITS));
29
+ }
30
+
31
+ /* A (forgetful) hash table to the data seen by the compressor, to
32
+ help create backward references to previous data.
33
+
34
+ This is a hash map of fixed size (BUCKET_SIZE). Starting from the
35
+ given index, BUCKET_SWEEP buckets are used to store values of a key. */
36
+ typedef struct HashLongestMatchQuickly {
37
+ uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
38
+ /* True if buckets_ array needs to be initialized. */
39
+ BROTLI_BOOL is_dirty_;
40
+ DictionarySearchStatictics dict_search_stats_;
41
+ } HashLongestMatchQuickly;
42
+
43
+ static void FN(Reset)(HashLongestMatchQuickly* self) {
44
+ self->is_dirty_ = BROTLI_TRUE;
45
+ DictionarySearchStaticticsReset(&self->dict_search_stats_);
46
+ }
47
+
48
+ static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
49
+ if (self->is_dirty_) {
50
+ /* It is not strictly necessary to fill this buffer here, but
51
+ not filling will make the results of the compression stochastic
52
+ (but correct). This is because random data would cause the
53
+ system to find accidentally good backward references here and there. */
54
+ memset(&self->buckets_[0], 0, sizeof(self->buckets_));
55
+ self->is_dirty_ = BROTLI_FALSE;
56
+ }
57
+ }
58
+
59
+ static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
60
+ size_t num) {
61
+ size_t i;
62
+ for (i = 0; i < num; ++i) {
63
+ const uint32_t key = FN(HashBytes)(&data[i]);
64
+ memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
65
+ }
66
+ if (num != 0) {
67
+ self->is_dirty_ = BROTLI_FALSE;
68
+ }
69
+ }
70
+
71
+ static void FN(Init)(
72
+ MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
73
+ const BrotliEncoderParams* params, size_t position, size_t bytes,
74
+ BROTLI_BOOL is_last) {
75
+ /* Choose which init method is faster.
76
+ Init() is about 100 times faster than InitForData(). */
77
+ const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
78
+ BROTLI_UNUSED(m);
79
+ BROTLI_UNUSED(params);
80
+ if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
81
+ FN(InitForData)(self, data, bytes);
82
+ } else {
83
+ FN(InitEmpty)(self);
84
+ }
85
+ }
86
+
87
+ /* Look at 5 bytes at &data[ix & mask].
88
+ Compute a hash from these, and store the value somewhere within
89
+ [ix .. ix+3]. */
90
+ static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
91
+ const uint8_t *data, const size_t mask, const size_t ix) {
92
+ const uint32_t key = FN(HashBytes)(&data[ix & mask]);
93
+ /* Wiggle the value with the bucket sweep range. */
94
+ const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
95
+ self->buckets_[key + off] = (uint32_t)ix;
96
+ }
97
+
98
+ static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
99
+ const uint8_t *data, const size_t mask, const size_t ix_start,
100
+ const size_t ix_end) {
101
+ size_t i;
102
+ for (i = ix_start; i < ix_end; ++i) {
103
+ FN(Store)(self, data, mask, i);
104
+ }
105
+ }
106
+
107
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
108
+ HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
109
+ const uint8_t* ringbuffer, size_t ringbuffer_mask) {
110
+ if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
111
+ /* Prepare the hashes for three last bytes of the last write.
112
+ These could not be calculated before, since they require knowledge
113
+ of both the previous and the current block. */
114
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
115
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
116
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
117
+ }
118
+ }
119
+
120
+ /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
121
+ up to the length of max_length and stores the position cur_ix in the
122
+ hash table.
123
+
124
+ Does not look for matches longer than max_length.
125
+ Does not look for matches further away than max_backward.
126
+ Writes the best match into |out|.
127
+ Returns true if match is found, otherwise false. */
128
+ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
129
+ HashLongestMatchQuickly* self, const uint8_t* BROTLI_RESTRICT data,
130
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
131
+ const size_t cur_ix, const size_t max_length, const size_t max_backward,
132
+ HasherSearchResult* BROTLI_RESTRICT out) {
133
+ const size_t best_len_in = out->len;
134
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
135
+ const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
136
+ int compare_char = data[cur_ix_masked + best_len_in];
137
+ score_t best_score = out->score;
138
+ size_t best_len = best_len_in;
139
+ size_t cached_backward = (size_t)distance_cache[0];
140
+ size_t prev_ix = cur_ix - cached_backward;
141
+ BROTLI_BOOL is_match_found = BROTLI_FALSE;
142
+ out->len_x_code = 0;
143
+ if (prev_ix < cur_ix) {
144
+ prev_ix &= (uint32_t)ring_buffer_mask;
145
+ if (compare_char == data[prev_ix + best_len]) {
146
+ size_t len = FindMatchLengthWithLimit(&data[prev_ix],
147
+ &data[cur_ix_masked],
148
+ max_length);
149
+ if (len >= 4) {
150
+ best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
151
+ best_len = len;
152
+ out->len = len;
153
+ out->distance = cached_backward;
154
+ out->score = best_score;
155
+ compare_char = data[cur_ix_masked + best_len];
156
+ if (BUCKET_SWEEP == 1) {
157
+ self->buckets_[key] = (uint32_t)cur_ix;
158
+ return BROTLI_TRUE;
159
+ } else {
160
+ is_match_found = BROTLI_TRUE;
161
+ }
162
+ }
163
+ }
164
+ }
165
+ if (BUCKET_SWEEP == 1) {
166
+ size_t backward;
167
+ size_t len;
168
+ /* Only one to look for, don't bother to prepare for a loop. */
169
+ prev_ix = self->buckets_[key];
170
+ self->buckets_[key] = (uint32_t)cur_ix;
171
+ backward = cur_ix - prev_ix;
172
+ prev_ix &= (uint32_t)ring_buffer_mask;
173
+ if (compare_char != data[prev_ix + best_len_in]) {
174
+ return BROTLI_FALSE;
175
+ }
176
+ if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
177
+ return BROTLI_FALSE;
178
+ }
179
+ len = FindMatchLengthWithLimit(&data[prev_ix],
180
+ &data[cur_ix_masked],
181
+ max_length);
182
+ if (len >= 4) {
183
+ out->len = len;
184
+ out->distance = backward;
185
+ out->score = BackwardReferenceScore(len, backward);
186
+ return BROTLI_TRUE;
187
+ }
188
+ } else {
189
+ uint32_t *bucket = self->buckets_ + key;
190
+ int i;
191
+ prev_ix = *bucket++;
192
+ for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
193
+ const size_t backward = cur_ix - prev_ix;
194
+ size_t len;
195
+ prev_ix &= (uint32_t)ring_buffer_mask;
196
+ if (compare_char != data[prev_ix + best_len]) {
197
+ continue;
198
+ }
199
+ if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
200
+ continue;
201
+ }
202
+ len = FindMatchLengthWithLimit(&data[prev_ix],
203
+ &data[cur_ix_masked],
204
+ max_length);
205
+ if (len >= 4) {
206
+ const score_t score = BackwardReferenceScore(len, backward);
207
+ if (best_score < score) {
208
+ best_score = score;
209
+ best_len = len;
210
+ out->len = best_len;
211
+ out->distance = backward;
212
+ out->score = score;
213
+ compare_char = data[cur_ix_masked + best_len];
214
+ is_match_found = BROTLI_TRUE;
215
+ }
216
+ }
217
+ }
218
+ }
219
+ if (USE_DICTIONARY && !is_match_found) {
220
+ is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
221
+ &data[cur_ix_masked], max_length, max_backward, out, BROTLI_TRUE);
222
+ }
223
+ self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
224
+ return is_match_found;
225
+ }
226
+
227
+ #undef HASH_MAP_SIZE
228
+ #undef BUCKET_SIZE
229
+
230
+ #undef HashLongestMatchQuickly
@@ -0,0 +1,95 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Build per-context histograms of literals, commands and distance codes. */
8
+
9
+ #include "./histogram.h"
10
+
11
+ #include "./block_splitter.h"
12
+ #include "./command.h"
13
+ #include "./context.h"
14
+
15
+ #if defined(__cplusplus) || defined(c_plusplus)
16
+ extern "C" {
17
+ #endif
18
+
19
+ typedef struct BlockSplitIterator {
20
+ const BlockSplit* split_; /* Not owned. */
21
+ size_t idx_;
22
+ size_t type_;
23
+ size_t length_;
24
+ } BlockSplitIterator;
25
+
26
+ static void InitBlockSplitIterator(BlockSplitIterator* self,
27
+ const BlockSplit* split) {
28
+ self->split_ = split;
29
+ self->idx_ = 0;
30
+ self->type_ = 0;
31
+ self->length_ = split->lengths ? split->lengths[0] : 0;
32
+ }
33
+
34
+ static void BlockSplitIteratorNext(BlockSplitIterator* self) {
35
+ if (self->length_ == 0) {
36
+ ++self->idx_;
37
+ self->type_ = self->split_->types[self->idx_];
38
+ self->length_ = self->split_->lengths[self->idx_];
39
+ }
40
+ --self->length_;
41
+ }
42
+
43
+ void BrotliBuildHistogramsWithContext(
44
+ const Command* cmds, const size_t num_commands,
45
+ const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
46
+ const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
47
+ size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
48
+ const ContextType* context_modes, HistogramLiteral* literal_histograms,
49
+ HistogramCommand* insert_and_copy_histograms,
50
+ HistogramDistance* copy_dist_histograms) {
51
+ size_t pos = start_pos;
52
+ BlockSplitIterator literal_it;
53
+ BlockSplitIterator insert_and_copy_it;
54
+ BlockSplitIterator dist_it;
55
+ size_t i;
56
+
57
+ InitBlockSplitIterator(&literal_it, literal_split);
58
+ InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
59
+ InitBlockSplitIterator(&dist_it, dist_split);
60
+ for (i = 0; i < num_commands; ++i) {
61
+ const Command* cmd = &cmds[i];
62
+ size_t j;
63
+ BlockSplitIteratorNext(&insert_and_copy_it);
64
+ HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
65
+ cmd->cmd_prefix_);
66
+ for (j = cmd->insert_len_; j != 0; --j) {
67
+ size_t context;
68
+ BlockSplitIteratorNext(&literal_it);
69
+ context = (literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
70
+ Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
71
+ HistogramAddLiteral(&literal_histograms[context],
72
+ ringbuffer[pos & mask]);
73
+ prev_byte2 = prev_byte;
74
+ prev_byte = ringbuffer[pos & mask];
75
+ ++pos;
76
+ }
77
+ pos += CommandCopyLen(cmd);
78
+ if (CommandCopyLen(cmd)) {
79
+ prev_byte2 = ringbuffer[(pos - 2) & mask];
80
+ prev_byte = ringbuffer[(pos - 1) & mask];
81
+ if (cmd->cmd_prefix_ >= 128) {
82
+ size_t context;
83
+ BlockSplitIteratorNext(&dist_it);
84
+ context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
85
+ CommandDistanceContext(cmd);
86
+ HistogramAddDistance(&copy_dist_histograms[context],
87
+ cmd->dist_prefix_);
88
+ }
89
+ }
90
+ }
91
+ }
92
+
93
+ #if defined(__cplusplus) || defined(c_plusplus)
94
+ } /* extern "C" */
95
+ #endif
@@ -4,91 +4,57 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- // Models the histograms of literals, commands and distance codes.
7
+ /* Models the histograms of literals, commands and distance codes. */
8
8
 
9
9
  #ifndef BROTLI_ENC_HISTOGRAM_H_
10
10
  #define BROTLI_ENC_HISTOGRAM_H_
11
11
 
12
- #include <cstring>
13
- #include <limits>
14
- #include <vector>
15
- #include "./context.h"
16
- #include "./command.h"
17
- #include "./fast_log.h"
18
- #include "./prefix.h"
19
- #include "./types.h"
20
-
21
- namespace brotli {
22
-
23
- struct BlockSplit;
24
-
25
- // A simple container for histograms of data in blocks.
26
- template<int kDataSize>
27
- struct Histogram {
28
- Histogram(void) {
29
- Clear();
30
- }
31
- void Clear(void) {
32
- memset(data_, 0, sizeof(data_));
33
- total_count_ = 0;
34
- bit_cost_ = std::numeric_limits<double>::infinity();
35
- }
36
- void Add(size_t val) {
37
- ++data_[val];
38
- ++total_count_;
39
- }
40
- void Remove(size_t val) {
41
- --data_[val];
42
- --total_count_;
43
- }
44
- template<typename DataType>
45
- void Add(const DataType *p, size_t n) {
46
- total_count_ += n;
47
- n += 1;
48
- while(--n) ++data_[*p++];
49
- }
50
- void AddHistogram(const Histogram& v) {
51
- total_count_ += v.total_count_;
52
- for (size_t i = 0; i < kDataSize; ++i) {
53
- data_[i] += v.data_[i];
54
- }
55
- }
56
-
57
- uint32_t data_[kDataSize];
58
- size_t total_count_;
59
- double bit_cost_;
60
- };
12
+ #include <string.h> /* memset */
61
13
 
62
- // Literal histogram.
63
- typedef Histogram<256> HistogramLiteral;
64
- // Prefix histograms.
65
- typedef Histogram<kNumCommandPrefixes> HistogramCommand;
66
- typedef Histogram<kNumDistancePrefixes> HistogramDistance;
67
- typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
68
- // Context map histogram, 256 Huffman tree indexes + 16 run length codes.
69
- typedef Histogram<272> HistogramContextMap;
70
- // Block type histogram, 256 block types + 2 special symbols.
71
- typedef Histogram<258> HistogramBlockType;
72
-
73
- static const size_t kLiteralContextBits = 6;
74
- static const size_t kDistanceContextBits = 2;
75
-
76
- void BuildHistograms(
77
- const Command* cmds,
78
- const size_t num_commands,
79
- const BlockSplit& literal_split,
80
- const BlockSplit& insert_and_copy_split,
81
- const BlockSplit& dist_split,
82
- const uint8_t* ringbuffer,
83
- size_t pos,
84
- size_t mask,
85
- uint8_t prev_byte,
86
- uint8_t prev_byte2,
87
- const std::vector<ContextType>& context_modes,
88
- std::vector<HistogramLiteral>* literal_histograms,
89
- std::vector<HistogramCommand>* insert_and_copy_histograms,
90
- std::vector<HistogramDistance>* copy_dist_histograms);
91
-
92
- } // namespace brotli
93
-
94
- #endif // BROTLI_ENC_HISTOGRAM_H_
14
+ #include "../common/constants.h"
15
+ #include "../common/types.h"
16
+ #include "./block_splitter.h"
17
+ #include "./command.h"
18
+ #include "./context.h"
19
+ #include "./port.h"
20
+
21
+ #if defined(__cplusplus) || defined(c_plusplus)
22
+ extern "C" {
23
+ #endif
24
+
25
+ #define FN(X) X ## Literal
26
+ #define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
27
+ #define DataType uint8_t
28
+ #include "./histogram_inc.h" /* NOLINT(build/include) */
29
+ #undef DataType
30
+ #undef DATA_SIZE
31
+ #undef FN
32
+
33
+ #define FN(X) X ## Command
34
+ #define DataType uint16_t
35
+ #define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
36
+ #include "./histogram_inc.h" /* NOLINT(build/include) */
37
+ #undef DATA_SIZE
38
+ #undef FN
39
+
40
+ #define FN(X) X ## Distance
41
+ #define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
42
+ #include "./histogram_inc.h" /* NOLINT(build/include) */
43
+ #undef DataType
44
+ #undef DATA_SIZE
45
+ #undef FN
46
+
47
+ BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
48
+ const Command* cmds, const size_t num_commands,
49
+ const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
50
+ const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
51
+ size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
52
+ const ContextType* context_modes, HistogramLiteral* literal_histograms,
53
+ HistogramCommand* insert_and_copy_histograms,
54
+ HistogramDistance* copy_dist_histograms);
55
+
56
+ #if defined(__cplusplus) || defined(c_plusplus)
57
+ } /* extern "C" */
58
+ #endif
59
+
60
+ #endif /* BROTLI_ENC_HISTOGRAM_H_ */
@@ -0,0 +1,51 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: Histogram, DATA_SIZE, DataType */
9
+
10
+ /* A simple container for histograms of data in blocks. */
11
+
12
+ typedef struct FN(Histogram) {
13
+ uint32_t data_[DATA_SIZE];
14
+ size_t total_count_;
15
+ double bit_cost_;
16
+ } FN(Histogram);
17
+
18
+ static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
19
+ memset(self->data_, 0, sizeof(self->data_));
20
+ self->total_count_ = 0;
21
+ self->bit_cost_ = HUGE_VAL;
22
+ }
23
+
24
+ static BROTLI_INLINE void FN(ClearHistograms)(
25
+ FN(Histogram)* array, size_t length) {
26
+ size_t i;
27
+ for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
28
+ }
29
+
30
+ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
31
+ ++self->data_[val];
32
+ ++self->total_count_;
33
+ }
34
+
35
+ static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
36
+ const DataType *p, size_t n) {
37
+ self->total_count_ += n;
38
+ n += 1;
39
+ while (--n) ++self->data_[*p++];
40
+ }
41
+
42
+ static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
43
+ const FN(Histogram)* v) {
44
+ size_t i;
45
+ self->total_count_ += v->total_count_;
46
+ for (i = 0; i < DATA_SIZE; ++i) {
47
+ self->data_[i] += v->data_[i];
48
+ }
49
+ }
50
+
51
+ static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
@@ -0,0 +1,178 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Literal cost model to allow backward reference replacement to be efficient.
8
+ */
9
+
10
+ #include "./literal_cost.h"
11
+
12
+ #include "../common/types.h"
13
+ #include "./fast_log.h"
14
+ #include "./port.h"
15
+ #include "./utf8_util.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
22
+ if (c < 128) {
23
+ return 0; /* Next one is the 'Byte 1' again. */
24
+ } else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
25
+ return BROTLI_MIN(size_t, 1, clamp);
26
+ } else {
27
+ /* Let's decide over the last byte if this ends the sequence. */
28
+ if (last < 0xe0) {
29
+ return 0; /* Completed two or three byte coding. */
30
+ } else { /* Next one is the 'Byte 3' of utf-8 encoding. */
31
+ return BROTLI_MIN(size_t, 2, clamp);
32
+ }
33
+ }
34
+ }
35
+
36
+ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
37
+ const uint8_t *data) {
38
+ size_t counts[3] = { 0 };
39
+ size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
40
+ size_t last_c = 0;
41
+ size_t utf8_pos = 0;
42
+ size_t i;
43
+ for (i = 0; i < len; ++i) {
44
+ size_t c = data[(pos + i) & mask];
45
+ utf8_pos = UTF8Position(last_c, c, 2);
46
+ ++counts[utf8_pos];
47
+ last_c = c;
48
+ }
49
+ if (counts[2] < 500) {
50
+ max_utf8 = 1;
51
+ }
52
+ if (counts[1] + counts[2] < 25) {
53
+ max_utf8 = 0;
54
+ }
55
+ return max_utf8;
56
+ }
57
+
58
+ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
59
+ const uint8_t *data, float *cost) {
60
+ /* max_utf8 is 0 (normal ascii single byte modeling),
61
+ 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). */
62
+ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
63
+ size_t histogram[3][256] = { { 0 } };
64
+ size_t window_half = 495;
65
+ size_t in_window = BROTLI_MIN(size_t, window_half, len);
66
+ size_t in_window_utf8[3] = { 0 };
67
+
68
+
69
+ size_t i;
70
+ { /* Bootstrap histograms. */
71
+ size_t last_c = 0;
72
+ size_t utf8_pos = 0;
73
+ for (i = 0; i < in_window; ++i) {
74
+ size_t c = data[(pos + i) & mask];
75
+ ++histogram[utf8_pos][c];
76
+ ++in_window_utf8[utf8_pos];
77
+ utf8_pos = UTF8Position(last_c, c, max_utf8);
78
+ last_c = c;
79
+ }
80
+ }
81
+
82
+ /* Compute bit costs with sliding window. */
83
+ for (i = 0; i < len; ++i) {
84
+ if (i >= window_half) {
85
+ /* Remove a byte in the past. */
86
+ size_t c =
87
+ i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
88
+ size_t last_c =
89
+ i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
90
+ size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
91
+ --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
92
+ --in_window_utf8[utf8_pos2];
93
+ }
94
+ if (i + window_half < len) {
95
+ /* Add a byte in the future. */
96
+ size_t c = data[(pos + i + window_half - 1) & mask];
97
+ size_t last_c = data[(pos + i + window_half - 2) & mask];
98
+ size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
99
+ ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
100
+ ++in_window_utf8[utf8_pos2];
101
+ }
102
+ {
103
+ size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
104
+ size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
105
+ size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
106
+ size_t masked_pos = (pos + i) & mask;
107
+ size_t histo = histogram[utf8_pos][data[masked_pos]];
108
+ double lit_cost;
109
+ if (histo == 0) {
110
+ histo = 1;
111
+ }
112
+ lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
113
+ lit_cost += 0.02905;
114
+ if (lit_cost < 1.0) {
115
+ lit_cost *= 0.5;
116
+ lit_cost += 0.5;
117
+ }
118
+ /* Make the first bytes more expensive -- seems to help, not sure why.
119
+ Perhaps because the entropy source is changing its properties
120
+ rapidly in the beginning of the file, perhaps because the beginning
121
+ of the data is a statistical "anomaly". */
122
+ if (i < 2000) {
123
+ lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
124
+ }
125
+ cost[i] = (float)lit_cost;
126
+ }
127
+ }
128
+ }
129
+
130
+ void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
131
+ const uint8_t *data, float *cost) {
132
+ if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
133
+ EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
134
+ return;
135
+ } else {
136
+ size_t histogram[256] = { 0 };
137
+ size_t window_half = 2000;
138
+ size_t in_window = BROTLI_MIN(size_t, window_half, len);
139
+
140
+ /* Bootstrap histogram. */
141
+ size_t i;
142
+ for (i = 0; i < in_window; ++i) {
143
+ ++histogram[data[(pos + i) & mask]];
144
+ }
145
+
146
+ /* Compute bit costs with sliding window. */
147
+ for (i = 0; i < len; ++i) {
148
+ size_t histo;
149
+ if (i >= window_half) {
150
+ /* Remove a byte in the past. */
151
+ --histogram[data[(pos + i - window_half) & mask]];
152
+ --in_window;
153
+ }
154
+ if (i + window_half < len) {
155
+ /* Add a byte in the future. */
156
+ ++histogram[data[(pos + i + window_half) & mask]];
157
+ ++in_window;
158
+ }
159
+ histo = histogram[data[(pos + i) & mask]];
160
+ if (histo == 0) {
161
+ histo = 1;
162
+ }
163
+ {
164
+ double lit_cost = FastLog2(in_window) - FastLog2(histo);
165
+ lit_cost += 0.029;
166
+ if (lit_cost < 1.0) {
167
+ lit_cost *= 0.5;
168
+ lit_cost += 0.5;
169
+ }
170
+ cost[i] = (float)lit_cost;
171
+ }
172
+ }
173
+ }
174
+ }
175
+
176
+ #if defined(__cplusplus) || defined(c_plusplus)
177
+ } /* extern "C" */
178
+ #endif