brotli 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/brotli/brotli.cc +114 -24
- data/ext/brotli/brotli.h +0 -1
- data/ext/brotli/extconf.rb +30 -23
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +1 -1
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +3 -3
- data/vendor/brotli/dec/bit_reader.h +25 -27
- data/vendor/brotli/dec/context.h +4 -4
- data/vendor/brotli/dec/decode.c +410 -486
- data/vendor/brotli/dec/decode.h +101 -105
- data/vendor/brotli/dec/dictionary.c +1 -1
- data/vendor/brotli/dec/dictionary.h +7 -8
- data/vendor/brotli/dec/huffman.c +103 -105
- data/vendor/brotli/dec/huffman.h +18 -18
- data/vendor/brotli/dec/port.h +52 -40
- data/vendor/brotli/dec/prefix.h +2 -0
- data/vendor/brotli/dec/state.c +13 -19
- data/vendor/brotli/dec/state.h +25 -39
- data/vendor/brotli/dec/transform.h +38 -44
- data/vendor/brotli/dec/types.h +2 -2
- data/vendor/brotli/enc/Makefile +1 -1
- data/vendor/brotli/enc/backward_references.cc +455 -359
- data/vendor/brotli/enc/backward_references.h +79 -3
- data/vendor/brotli/enc/bit_cost.h +54 -32
- data/vendor/brotli/enc/block_splitter.cc +285 -193
- data/vendor/brotli/enc/block_splitter.h +4 -12
- data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
- data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
- data/vendor/brotli/enc/cluster.h +161 -120
- data/vendor/brotli/enc/command.h +60 -37
- data/vendor/brotli/enc/compress_fragment.cc +701 -0
- data/vendor/brotli/enc/compress_fragment.h +47 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
- data/vendor/brotli/enc/compressor.h +15 -0
- data/vendor/brotli/enc/context.h +1 -1
- data/vendor/brotli/enc/dictionary.h +2 -2
- data/vendor/brotli/enc/encode.cc +819 -286
- data/vendor/brotli/enc/encode.h +38 -15
- data/vendor/brotli/enc/encode_parallel.cc +40 -42
- data/vendor/brotli/enc/entropy_encode.cc +144 -147
- data/vendor/brotli/enc/entropy_encode.h +32 -8
- data/vendor/brotli/enc/entropy_encode_static.h +572 -0
- data/vendor/brotli/enc/fast_log.h +7 -40
- data/vendor/brotli/enc/find_match_length.h +9 -9
- data/vendor/brotli/enc/hash.h +462 -154
- data/vendor/brotli/enc/histogram.cc +6 -6
- data/vendor/brotli/enc/histogram.h +13 -13
- data/vendor/brotli/enc/literal_cost.cc +45 -45
- data/vendor/brotli/enc/metablock.cc +92 -89
- data/vendor/brotli/enc/metablock.h +12 -12
- data/vendor/brotli/enc/port.h +7 -16
- data/vendor/brotli/enc/prefix.h +23 -22
- data/vendor/brotli/enc/ringbuffer.h +75 -29
- data/vendor/brotli/enc/static_dict.cc +56 -48
- data/vendor/brotli/enc/static_dict.h +5 -5
- data/vendor/brotli/enc/streams.cc +1 -1
- data/vendor/brotli/enc/streams.h +5 -5
- data/vendor/brotli/enc/transform.h +40 -35
- data/vendor/brotli/enc/types.h +2 -0
- data/vendor/brotli/enc/utf8_util.cc +3 -2
- data/vendor/brotli/enc/write_bits.h +6 -6
- metadata +9 -5
- data/vendor/brotli/dec/streams.c +0 -102
- data/vendor/brotli/dec/streams.h +0 -95
data/vendor/brotli/enc/hash.h
CHANGED
@@ -10,12 +10,10 @@
|
|
10
10
|
#ifndef BROTLI_ENC_HASH_H_
|
11
11
|
#define BROTLI_ENC_HASH_H_
|
12
12
|
|
13
|
-
#include <string.h>
|
14
13
|
#include <sys/types.h>
|
15
14
|
#include <algorithm>
|
16
|
-
#include <
|
17
|
-
#include <
|
18
|
-
#include <string>
|
15
|
+
#include <cstring>
|
16
|
+
#include <limits>
|
19
17
|
|
20
18
|
#include "./dictionary_hash.h"
|
21
19
|
#include "./fast_log.h"
|
@@ -28,15 +26,20 @@
|
|
28
26
|
|
29
27
|
namespace brotli {
|
30
28
|
|
31
|
-
static const
|
29
|
+
static const size_t kMaxTreeSearchDepth = 64;
|
30
|
+
static const size_t kMaxTreeCompLength = 128;
|
31
|
+
|
32
|
+
static const uint32_t kDistanceCacheIndex[] = {
|
32
33
|
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
33
34
|
};
|
34
35
|
static const int kDistanceCacheOffset[] = {
|
35
36
|
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
36
37
|
};
|
37
38
|
|
38
|
-
static const
|
39
|
-
static const
|
39
|
+
static const uint32_t kCutoffTransformsCount = 10;
|
40
|
+
static const uint8_t kCutoffTransforms[] = {
|
41
|
+
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
|
42
|
+
};
|
40
43
|
|
41
44
|
// kHashMul32 multiplier has these properties:
|
42
45
|
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
@@ -68,41 +71,47 @@ inline uint32_t Hash(const uint8_t *data) {
|
|
68
71
|
// This function is used to sometimes discard a longer backward reference
|
69
72
|
// when it is not much longer and the bit cost for encoding it is more
|
70
73
|
// than the saved literals.
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
+
//
|
75
|
+
// backward_reference_offset MUST be positive.
|
76
|
+
inline double BackwardReferenceScore(size_t copy_length,
|
77
|
+
size_t backward_reference_offset) {
|
78
|
+
return 5.4 * static_cast<double>(copy_length) -
|
79
|
+
1.20 * Log2FloorNonZero(backward_reference_offset);
|
74
80
|
}
|
75
81
|
|
76
|
-
inline double BackwardReferenceScoreUsingLastDistance(
|
77
|
-
|
82
|
+
inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
|
83
|
+
size_t distance_short_code) {
|
78
84
|
static const double kDistanceShortCodeBitCost[16] = {
|
79
85
|
-0.6, 0.95, 1.17, 1.27,
|
80
86
|
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
81
87
|
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
82
88
|
};
|
83
|
-
return 5.4 * copy_length -
|
89
|
+
return 5.4 * static_cast<double>(copy_length) -
|
90
|
+
kDistanceShortCodeBitCost[distance_short_code];
|
84
91
|
}
|
85
92
|
|
86
93
|
struct BackwardMatch {
|
87
|
-
BackwardMatch() : distance(0), length_and_code(0) {}
|
94
|
+
BackwardMatch(void) : distance(0), length_and_code(0) {}
|
88
95
|
|
89
|
-
BackwardMatch(
|
90
|
-
: distance(dist)
|
96
|
+
BackwardMatch(size_t dist, size_t len)
|
97
|
+
: distance(static_cast<uint32_t>(dist))
|
98
|
+
, length_and_code(static_cast<uint32_t>(len << 5)) {}
|
91
99
|
|
92
|
-
BackwardMatch(
|
93
|
-
: distance(dist)
|
94
|
-
|
100
|
+
BackwardMatch(size_t dist, size_t len, size_t len_code)
|
101
|
+
: distance(static_cast<uint32_t>(dist))
|
102
|
+
, length_and_code(static_cast<uint32_t>(
|
103
|
+
(len << 5) | (len == len_code ? 0 : len_code))) {}
|
95
104
|
|
96
|
-
|
105
|
+
size_t length(void) const {
|
97
106
|
return length_and_code >> 5;
|
98
107
|
}
|
99
|
-
|
100
|
-
|
108
|
+
size_t length_code(void) const {
|
109
|
+
size_t code = length_and_code & 31;
|
101
110
|
return code ? code : length();
|
102
111
|
}
|
103
112
|
|
104
|
-
|
105
|
-
|
113
|
+
uint32_t distance;
|
114
|
+
uint32_t length_and_code;
|
106
115
|
};
|
107
116
|
|
108
117
|
// A (forgetful) hash table to the data seen by the compressor, to
|
@@ -113,18 +122,31 @@ struct BackwardMatch {
|
|
113
122
|
template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
|
114
123
|
class HashLongestMatchQuickly {
|
115
124
|
public:
|
116
|
-
HashLongestMatchQuickly() {
|
125
|
+
HashLongestMatchQuickly(void) {
|
117
126
|
Reset();
|
118
127
|
}
|
119
|
-
void Reset() {
|
120
|
-
|
121
|
-
// not filling will make the results of the compression stochastic
|
122
|
-
// (but correct). This is because random data would cause the
|
123
|
-
// system to find accidentally good backward references here and there.
|
124
|
-
memset(&buckets_[0], 0, sizeof(buckets_));
|
128
|
+
void Reset(void) {
|
129
|
+
need_init_ = true;
|
125
130
|
num_dict_lookups_ = 0;
|
126
131
|
num_dict_matches_ = 0;
|
127
132
|
}
|
133
|
+
void Init(void) {
|
134
|
+
if (need_init_) {
|
135
|
+
// It is not strictly necessary to fill this buffer here, but
|
136
|
+
// not filling will make the results of the compression stochastic
|
137
|
+
// (but correct). This is because random data would cause the
|
138
|
+
// system to find accidentally good backward references here and there.
|
139
|
+
memset(&buckets_[0], 0, sizeof(buckets_));
|
140
|
+
need_init_ = false;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
void InitForData(const uint8_t* data, size_t num) {
|
144
|
+
for (size_t i = 0; i < num; ++i) {
|
145
|
+
const uint32_t key = HashBytes(&data[i]);
|
146
|
+
memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
|
147
|
+
need_init_ = false;
|
148
|
+
}
|
149
|
+
}
|
128
150
|
// Look at 4 bytes at data.
|
129
151
|
// Compute a hash from these, and store the value somewhere within
|
130
152
|
// [ix .. ix+3].
|
@@ -136,7 +158,8 @@ class HashLongestMatchQuickly {
|
|
136
158
|
}
|
137
159
|
|
138
160
|
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
139
|
-
// up to the length of max_length
|
161
|
+
// up to the length of max_length and stores the position cur_ix in the
|
162
|
+
// hash table.
|
140
163
|
//
|
141
164
|
// Does not look for matches longer than max_length.
|
142
165
|
// Does not look for matches further away than max_backward.
|
@@ -146,27 +169,28 @@ class HashLongestMatchQuickly {
|
|
146
169
|
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
|
147
170
|
const size_t ring_buffer_mask,
|
148
171
|
const int* __restrict distance_cache,
|
149
|
-
const
|
150
|
-
const
|
151
|
-
const
|
152
|
-
|
153
|
-
|
154
|
-
|
172
|
+
const size_t cur_ix,
|
173
|
+
const size_t max_length,
|
174
|
+
const size_t max_backward,
|
175
|
+
size_t * __restrict best_len_out,
|
176
|
+
size_t * __restrict best_len_code_out,
|
177
|
+
size_t * __restrict best_distance_out,
|
155
178
|
double* __restrict best_score_out) {
|
156
|
-
const
|
179
|
+
const size_t best_len_in = *best_len_out;
|
157
180
|
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
181
|
+
const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
|
158
182
|
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
159
183
|
double best_score = *best_score_out;
|
160
|
-
|
161
|
-
|
162
|
-
|
184
|
+
size_t best_len = best_len_in;
|
185
|
+
size_t cached_backward = static_cast<size_t>(distance_cache[0]);
|
186
|
+
size_t prev_ix = cur_ix - cached_backward;
|
163
187
|
bool match_found = false;
|
164
188
|
if (prev_ix < cur_ix) {
|
165
189
|
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
166
190
|
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
167
|
-
|
168
|
-
|
169
|
-
|
191
|
+
size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
192
|
+
&ring_buffer[cur_ix_masked],
|
193
|
+
max_length);
|
170
194
|
if (len >= 4) {
|
171
195
|
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
172
196
|
best_len = len;
|
@@ -176,6 +200,7 @@ class HashLongestMatchQuickly {
|
|
176
200
|
*best_score_out = best_score;
|
177
201
|
compare_char = ring_buffer[cur_ix_masked + best_len];
|
178
202
|
if (kBucketSweep == 1) {
|
203
|
+
buckets_[key] = static_cast<uint32_t>(cur_ix);
|
179
204
|
return true;
|
180
205
|
} else {
|
181
206
|
match_found = true;
|
@@ -183,11 +208,11 @@ class HashLongestMatchQuickly {
|
|
183
208
|
}
|
184
209
|
}
|
185
210
|
}
|
186
|
-
const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
|
187
211
|
if (kBucketSweep == 1) {
|
188
212
|
// Only one to look for, don't bother to prepare for a loop.
|
189
213
|
prev_ix = buckets_[key];
|
190
|
-
|
214
|
+
buckets_[key] = static_cast<uint32_t>(cur_ix);
|
215
|
+
size_t backward = cur_ix - prev_ix;
|
191
216
|
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
192
217
|
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
193
218
|
return false;
|
@@ -195,9 +220,9 @@ class HashLongestMatchQuickly {
|
|
195
220
|
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
196
221
|
return false;
|
197
222
|
}
|
198
|
-
const
|
199
|
-
|
200
|
-
|
223
|
+
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
224
|
+
&ring_buffer[cur_ix_masked],
|
225
|
+
max_length);
|
201
226
|
if (len >= 4) {
|
202
227
|
*best_len_out = len;
|
203
228
|
*best_len_code_out = len;
|
@@ -209,7 +234,7 @@ class HashLongestMatchQuickly {
|
|
209
234
|
uint32_t *bucket = buckets_ + key;
|
210
235
|
prev_ix = *bucket++;
|
211
236
|
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
212
|
-
const
|
237
|
+
const size_t backward = cur_ix - prev_ix;
|
213
238
|
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
214
239
|
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
215
240
|
continue;
|
@@ -217,10 +242,9 @@ class HashLongestMatchQuickly {
|
|
217
242
|
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
218
243
|
continue;
|
219
244
|
}
|
220
|
-
const
|
221
|
-
|
222
|
-
|
223
|
-
max_length);
|
245
|
+
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
246
|
+
&ring_buffer[cur_ix_masked],
|
247
|
+
max_length);
|
224
248
|
if (len >= 4) {
|
225
249
|
const double score = BackwardReferenceScore(len, backward);
|
226
250
|
if (best_score < score) {
|
@@ -242,19 +266,20 @@ class HashLongestMatchQuickly {
|
|
242
266
|
const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
|
243
267
|
const uint16_t v = kStaticDictionaryHash[dict_key];
|
244
268
|
if (v > 0) {
|
245
|
-
const
|
246
|
-
const
|
247
|
-
const
|
269
|
+
const uint32_t len = v & 31;
|
270
|
+
const uint32_t dist = v >> 5;
|
271
|
+
const size_t offset =
|
272
|
+
kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
248
273
|
if (len <= max_length) {
|
249
|
-
const
|
274
|
+
const size_t matchlen =
|
250
275
|
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
251
276
|
&kBrotliDictionary[offset], len);
|
252
|
-
if (matchlen > len
|
253
|
-
const
|
254
|
-
const
|
255
|
-
transform_id * (
|
277
|
+
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
278
|
+
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
279
|
+
const size_t word_id =
|
280
|
+
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
|
256
281
|
dist;
|
257
|
-
const
|
282
|
+
const size_t backward = max_backward + word_id + 1;
|
258
283
|
const double score = BackwardReferenceScore(matchlen, backward);
|
259
284
|
if (best_score < score) {
|
260
285
|
++num_dict_matches_;
|
@@ -264,12 +289,14 @@ class HashLongestMatchQuickly {
|
|
264
289
|
*best_len_code_out = len;
|
265
290
|
*best_distance_out = backward;
|
266
291
|
*best_score_out = best_score;
|
267
|
-
|
292
|
+
match_found = true;
|
268
293
|
}
|
269
294
|
}
|
270
295
|
}
|
271
296
|
}
|
272
297
|
}
|
298
|
+
const uint32_t off = (cur_ix >> 3) % kBucketSweep;
|
299
|
+
buckets_[key + off] = static_cast<uint32_t>(cur_ix);
|
273
300
|
return match_found;
|
274
301
|
}
|
275
302
|
|
@@ -287,16 +314,17 @@ class HashLongestMatchQuickly {
|
|
287
314
|
return static_cast<uint32_t>(h >> (64 - kBucketBits));
|
288
315
|
}
|
289
316
|
|
317
|
+
enum { kHashMapSize = 4 << kBucketBits };
|
318
|
+
|
290
319
|
private:
|
291
320
|
static const uint32_t kBucketSize = 1 << kBucketBits;
|
292
321
|
uint32_t buckets_[kBucketSize + kBucketSweep];
|
322
|
+
// True if buckets_ array needs to be initialized.
|
323
|
+
bool need_init_;
|
293
324
|
size_t num_dict_lookups_;
|
294
325
|
size_t num_dict_matches_;
|
295
326
|
};
|
296
327
|
|
297
|
-
// The maximum length for which the zopflification uses distinct distances.
|
298
|
-
static const int kMaxZopfliLen = 325;
|
299
|
-
|
300
328
|
// A (forgetful) hash table to the data seen by the compressor, to
|
301
329
|
// help create backward references to previous data.
|
302
330
|
//
|
@@ -308,16 +336,31 @@ template <int kBucketBits,
|
|
308
336
|
int kNumLastDistancesToCheck>
|
309
337
|
class HashLongestMatch {
|
310
338
|
public:
|
311
|
-
HashLongestMatch() {
|
339
|
+
HashLongestMatch(void) {
|
312
340
|
Reset();
|
313
341
|
}
|
314
342
|
|
315
|
-
void Reset() {
|
316
|
-
|
343
|
+
void Reset(void) {
|
344
|
+
need_init_ = true;
|
317
345
|
num_dict_lookups_ = 0;
|
318
346
|
num_dict_matches_ = 0;
|
319
347
|
}
|
320
348
|
|
349
|
+
void Init(void) {
|
350
|
+
if (need_init_) {
|
351
|
+
memset(&num_[0], 0, sizeof(num_));
|
352
|
+
need_init_ = false;
|
353
|
+
}
|
354
|
+
}
|
355
|
+
|
356
|
+
void InitForData(const uint8_t* data, size_t num) {
|
357
|
+
for (size_t i = 0; i < num; ++i) {
|
358
|
+
const uint32_t key = HashBytes(&data[i]);
|
359
|
+
num_[key] = 0;
|
360
|
+
need_init_ = false;
|
361
|
+
}
|
362
|
+
}
|
363
|
+
|
321
364
|
// Look at 3 bytes at data.
|
322
365
|
// Compute a hash from these, and store the value of ix at that position.
|
323
366
|
inline void Store(const uint8_t *data, const uint32_t ix) {
|
@@ -328,7 +371,7 @@ class HashLongestMatch {
|
|
328
371
|
}
|
329
372
|
|
330
373
|
// Find a longest backward match of &data[cur_ix] up to the length of
|
331
|
-
// max_length.
|
374
|
+
// max_length and stores the position cur_ix in the hash table.
|
332
375
|
//
|
333
376
|
// Does not look for matches longer than max_length.
|
334
377
|
// Does not look for matches further away than max_backward.
|
@@ -339,41 +382,42 @@ class HashLongestMatch {
|
|
339
382
|
bool FindLongestMatch(const uint8_t * __restrict data,
|
340
383
|
const size_t ring_buffer_mask,
|
341
384
|
const int* __restrict distance_cache,
|
342
|
-
const
|
343
|
-
const
|
344
|
-
const
|
345
|
-
|
346
|
-
|
347
|
-
|
385
|
+
const size_t cur_ix,
|
386
|
+
const size_t max_length,
|
387
|
+
const size_t max_backward,
|
388
|
+
size_t * __restrict best_len_out,
|
389
|
+
size_t * __restrict best_len_code_out,
|
390
|
+
size_t * __restrict best_distance_out,
|
348
391
|
double * __restrict best_score_out) {
|
349
392
|
*best_len_code_out = 0;
|
350
393
|
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
351
394
|
bool match_found = false;
|
352
395
|
// Don't accept a short copy from far away.
|
353
396
|
double best_score = *best_score_out;
|
354
|
-
|
397
|
+
size_t best_len = *best_len_out;
|
355
398
|
*best_len_out = 0;
|
356
399
|
// Try last distance first.
|
357
|
-
for (
|
358
|
-
const
|
359
|
-
const
|
360
|
-
|
400
|
+
for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
|
401
|
+
const size_t idx = kDistanceCacheIndex[i];
|
402
|
+
const size_t backward =
|
403
|
+
static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
|
404
|
+
size_t prev_ix = static_cast<size_t>(cur_ix - backward);
|
361
405
|
if (prev_ix >= cur_ix) {
|
362
406
|
continue;
|
363
407
|
}
|
364
|
-
if (PREDICT_FALSE(backward >
|
408
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
365
409
|
continue;
|
366
410
|
}
|
367
|
-
prev_ix &=
|
411
|
+
prev_ix &= ring_buffer_mask;
|
368
412
|
|
369
413
|
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
370
414
|
prev_ix + best_len > ring_buffer_mask ||
|
371
415
|
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
372
416
|
continue;
|
373
417
|
}
|
374
|
-
const
|
375
|
-
|
376
|
-
|
418
|
+
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
419
|
+
&data[cur_ix_masked],
|
420
|
+
max_length);
|
377
421
|
if (len >= 3 || (len == 2 && i < 2)) {
|
378
422
|
// Comparing for >= 2 does not change the semantics, but just saves for
|
379
423
|
// a few unnecessary binary logarithms in backward reference score,
|
@@ -392,22 +436,23 @@ class HashLongestMatch {
|
|
392
436
|
}
|
393
437
|
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
394
438
|
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
395
|
-
const
|
396
|
-
for (
|
397
|
-
|
398
|
-
|
439
|
+
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
440
|
+
for (size_t i = num_[key]; i > down;) {
|
441
|
+
--i;
|
442
|
+
size_t prev_ix = bucket[i & kBlockMask];
|
443
|
+
const size_t backward = cur_ix - prev_ix;
|
399
444
|
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
400
445
|
break;
|
401
446
|
}
|
402
|
-
prev_ix &=
|
447
|
+
prev_ix &= ring_buffer_mask;
|
403
448
|
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
404
449
|
prev_ix + best_len > ring_buffer_mask ||
|
405
450
|
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
406
451
|
continue;
|
407
452
|
}
|
408
|
-
const
|
409
|
-
|
410
|
-
|
453
|
+
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
454
|
+
&data[cur_ix_masked],
|
455
|
+
max_length);
|
411
456
|
if (len >= 4) {
|
412
457
|
// Comparing for >= 3 does not change the semantics, but just saves
|
413
458
|
// for a few unnecessary binary logarithms in backward reference
|
@@ -424,25 +469,28 @@ class HashLongestMatch {
|
|
424
469
|
}
|
425
470
|
}
|
426
471
|
}
|
472
|
+
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
|
473
|
+
++num_[key];
|
427
474
|
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
428
|
-
|
475
|
+
size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
|
429
476
|
for (int k = 0; k < 2; ++k, ++dict_key) {
|
430
477
|
++num_dict_lookups_;
|
431
478
|
const uint16_t v = kStaticDictionaryHash[dict_key];
|
432
479
|
if (v > 0) {
|
433
|
-
const
|
434
|
-
const
|
435
|
-
const
|
480
|
+
const size_t len = v & 31;
|
481
|
+
const size_t dist = v >> 5;
|
482
|
+
const size_t offset =
|
483
|
+
kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
436
484
|
if (len <= max_length) {
|
437
|
-
const
|
485
|
+
const size_t matchlen =
|
438
486
|
FindMatchLengthWithLimit(&data[cur_ix_masked],
|
439
487
|
&kBrotliDictionary[offset], len);
|
440
|
-
if (matchlen > len
|
441
|
-
const
|
442
|
-
const
|
488
|
+
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
489
|
+
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
490
|
+
const size_t word_id =
|
443
491
|
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
444
492
|
dist;
|
445
|
-
const
|
493
|
+
const size_t backward = max_backward + word_id + 1;
|
446
494
|
double score = BackwardReferenceScore(matchlen, backward);
|
447
495
|
if (best_score < score) {
|
448
496
|
++num_dict_matches_;
|
@@ -462,28 +510,25 @@ class HashLongestMatch {
|
|
462
510
|
return match_found;
|
463
511
|
}
|
464
512
|
|
465
|
-
//
|
513
|
+
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
514
|
+
// length of max_length and stores the position cur_ix in the hash table.
|
466
515
|
//
|
467
516
|
// Sets *num_matches to the number of matches found, and stores the found
|
468
|
-
// matches in matches[0] to matches[*num_matches - 1].
|
469
|
-
//
|
470
|
-
//
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
const int max_length,
|
478
|
-
const uint32_t max_backward,
|
479
|
-
int* num_matches,
|
480
|
-
BackwardMatch* matches) const {
|
517
|
+
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
518
|
+
// sorted by strictly increasing length and (non-strictly) increasing
|
519
|
+
// distance.
|
520
|
+
size_t FindAllMatches(const uint8_t* data,
|
521
|
+
const size_t ring_buffer_mask,
|
522
|
+
const size_t cur_ix,
|
523
|
+
const size_t max_length,
|
524
|
+
const size_t max_backward,
|
525
|
+
BackwardMatch* matches) {
|
481
526
|
BackwardMatch* const orig_matches = matches;
|
482
527
|
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
483
|
-
|
484
|
-
|
485
|
-
if (
|
486
|
-
for (
|
528
|
+
size_t best_len = 1;
|
529
|
+
size_t stop = cur_ix - 64;
|
530
|
+
if (cur_ix < 64) { stop = 0; }
|
531
|
+
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
487
532
|
size_t prev_ix = i;
|
488
533
|
const size_t backward = cur_ix - prev_ix;
|
489
534
|
if (PREDICT_FALSE(backward > max_backward)) {
|
@@ -494,57 +539,57 @@ class HashLongestMatch {
|
|
494
539
|
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
495
540
|
continue;
|
496
541
|
}
|
497
|
-
const
|
542
|
+
const size_t len =
|
498
543
|
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
499
544
|
max_length);
|
500
545
|
if (len > best_len) {
|
501
546
|
best_len = len;
|
502
|
-
|
503
|
-
matches = orig_matches;
|
504
|
-
}
|
505
|
-
*matches++ = BackwardMatch(static_cast<int>(backward), len);
|
547
|
+
*matches++ = BackwardMatch(backward, len);
|
506
548
|
}
|
507
549
|
}
|
508
550
|
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
509
551
|
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
510
|
-
const
|
511
|
-
for (
|
512
|
-
|
513
|
-
|
552
|
+
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
553
|
+
for (size_t i = num_[key]; i > down;) {
|
554
|
+
--i;
|
555
|
+
size_t prev_ix = bucket[i & kBlockMask];
|
556
|
+
const size_t backward = cur_ix - prev_ix;
|
514
557
|
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
515
558
|
break;
|
516
559
|
}
|
517
|
-
prev_ix &=
|
560
|
+
prev_ix &= ring_buffer_mask;
|
518
561
|
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
519
562
|
prev_ix + best_len > ring_buffer_mask ||
|
520
563
|
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
521
564
|
continue;
|
522
565
|
}
|
523
|
-
const
|
566
|
+
const size_t len =
|
524
567
|
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
525
568
|
max_length);
|
526
569
|
if (len > best_len) {
|
527
570
|
best_len = len;
|
528
|
-
if (len > kMaxZopfliLen) {
|
529
|
-
matches = orig_matches;
|
530
|
-
}
|
531
571
|
*matches++ = BackwardMatch(backward, len);
|
532
572
|
}
|
533
573
|
}
|
534
|
-
|
535
|
-
|
574
|
+
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
|
575
|
+
++num_[key];
|
576
|
+
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
|
577
|
+
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
|
578
|
+
dict_matches[i] = kInvalidMatch;
|
579
|
+
}
|
580
|
+
size_t minlen = std::max<size_t>(4, best_len + 1);
|
536
581
|
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
537
582
|
&dict_matches[0])) {
|
538
|
-
|
539
|
-
for (
|
540
|
-
|
583
|
+
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
|
584
|
+
for (size_t l = minlen; l <= maxlen; ++l) {
|
585
|
+
uint32_t dict_id = dict_matches[l];
|
541
586
|
if (dict_id < kInvalidMatch) {
|
542
587
|
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
|
543
588
|
dict_id & 31);
|
544
589
|
}
|
545
590
|
}
|
546
591
|
}
|
547
|
-
|
592
|
+
return static_cast<size_t>(matches - orig_matches);
|
548
593
|
}
|
549
594
|
|
550
595
|
enum { kHashLength = 4 };
|
@@ -560,6 +605,10 @@ class HashLongestMatch {
|
|
560
605
|
return h >> (32 - kBucketBits);
|
561
606
|
}
|
562
607
|
|
608
|
+
enum { kHashMapSize = 2 << kBucketBits };
|
609
|
+
|
610
|
+
static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
|
611
|
+
|
563
612
|
private:
|
564
613
|
// Number of hash buckets.
|
565
614
|
static const uint32_t kBucketSize = 1 << kBucketBits;
|
@@ -577,29 +626,281 @@ class HashLongestMatch {
|
|
577
626
|
// Buckets containing kBlockSize of backward references.
|
578
627
|
uint32_t buckets_[kBucketSize][kBlockSize];
|
579
628
|
|
629
|
+
// True if num_ array needs to be initialized.
|
630
|
+
bool need_init_;
|
631
|
+
|
580
632
|
size_t num_dict_lookups_;
|
581
633
|
size_t num_dict_matches_;
|
582
634
|
};
|
583
635
|
|
636
|
+
// A (forgetful) hash table where each hash bucket contains a binary tree of
|
637
|
+
// sequences whose first 4 bytes share the same hash code.
|
638
|
+
// Each sequence is kMaxTreeCompLength long and is identified by its starting
|
639
|
+
// position in the input data. The binary tree is sorted by the lexicographic
|
640
|
+
// order of the sequences, and it is also a max-heap with respect to the
|
641
|
+
// starting positions.
|
642
|
+
class HashToBinaryTree {
|
643
|
+
public:
|
644
|
+
HashToBinaryTree() : forest_(NULL) {
|
645
|
+
Reset();
|
646
|
+
}
|
647
|
+
|
648
|
+
~HashToBinaryTree() {
|
649
|
+
delete[] forest_;
|
650
|
+
}
|
651
|
+
|
652
|
+
void Reset() {
|
653
|
+
need_init_ = true;
|
654
|
+
}
|
655
|
+
|
656
|
+
void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
|
657
|
+
if (need_init_) {
|
658
|
+
window_mask_ = (1u << lgwin) - 1u;
|
659
|
+
invalid_pos_ = static_cast<uint32_t>(-window_mask_);
|
660
|
+
for (uint32_t i = 0; i < kBucketSize; i++) {
|
661
|
+
buckets_[i] = invalid_pos_;
|
662
|
+
}
|
663
|
+
size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
|
664
|
+
forest_ = new uint32_t[2 * num_nodes];
|
665
|
+
need_init_ = false;
|
666
|
+
}
|
667
|
+
}
|
668
|
+
|
669
|
+
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
670
|
+
// length of max_length and stores the position cur_ix in the hash table.
|
671
|
+
//
|
672
|
+
// Sets *num_matches to the number of matches found, and stores the found
|
673
|
+
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
674
|
+
// sorted by strictly increasing length and (non-strictly) increasing
|
675
|
+
// distance.
|
676
|
+
size_t FindAllMatches(const uint8_t* data,
|
677
|
+
const size_t ring_buffer_mask,
|
678
|
+
const size_t cur_ix,
|
679
|
+
const size_t max_length,
|
680
|
+
const size_t max_backward,
|
681
|
+
BackwardMatch* matches) {
|
682
|
+
BackwardMatch* const orig_matches = matches;
|
683
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
684
|
+
size_t best_len = 1;
|
685
|
+
size_t stop = cur_ix - 64;
|
686
|
+
if (cur_ix < 64) { stop = 0; }
|
687
|
+
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
688
|
+
size_t prev_ix = i;
|
689
|
+
const size_t backward = cur_ix - prev_ix;
|
690
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
691
|
+
break;
|
692
|
+
}
|
693
|
+
prev_ix &= ring_buffer_mask;
|
694
|
+
if (data[cur_ix_masked] != data[prev_ix] ||
|
695
|
+
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
696
|
+
continue;
|
697
|
+
}
|
698
|
+
const size_t len =
|
699
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
700
|
+
max_length);
|
701
|
+
if (len > best_len) {
|
702
|
+
best_len = len;
|
703
|
+
*matches++ = BackwardMatch(backward, len);
|
704
|
+
}
|
705
|
+
}
|
706
|
+
if (best_len < max_length) {
|
707
|
+
matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
|
708
|
+
max_length, &best_len, matches);
|
709
|
+
}
|
710
|
+
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
|
711
|
+
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
|
712
|
+
dict_matches[i] = kInvalidMatch;
|
713
|
+
}
|
714
|
+
size_t minlen = std::max<size_t>(4, best_len + 1);
|
715
|
+
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
716
|
+
&dict_matches[0])) {
|
717
|
+
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
|
718
|
+
for (size_t l = minlen; l <= maxlen; ++l) {
|
719
|
+
uint32_t dict_id = dict_matches[l];
|
720
|
+
if (dict_id < kInvalidMatch) {
|
721
|
+
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
|
722
|
+
dict_id & 31);
|
723
|
+
}
|
724
|
+
}
|
725
|
+
}
|
726
|
+
return static_cast<size_t>(matches - orig_matches);
|
727
|
+
}
|
728
|
+
|
729
|
+
// Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
730
|
+
// current sequence, without returning any matches.
|
731
|
+
// REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
|
732
|
+
void Store(const uint8_t* data,
|
733
|
+
const size_t ring_buffer_mask,
|
734
|
+
const size_t cur_ix) {
|
735
|
+
size_t best_len = 0;
|
736
|
+
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
|
737
|
+
&best_len, NULL);
|
738
|
+
}
|
739
|
+
|
740
|
+
void StitchToPreviousBlock(size_t num_bytes,
|
741
|
+
size_t position,
|
742
|
+
const uint8_t* ringbuffer,
|
743
|
+
size_t ringbuffer_mask) {
|
744
|
+
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
|
745
|
+
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
|
746
|
+
// These could not be calculated before, since they require knowledge
|
747
|
+
// of both the previous and the current block.
|
748
|
+
const size_t i_start = position - kMaxTreeCompLength + 1;
|
749
|
+
const size_t i_end = std::min(position, i_start + num_bytes);
|
750
|
+
for (size_t i = i_start; i < i_end; ++i) {
|
751
|
+
// We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
|
752
|
+
// end of the current block and that we have at least
|
753
|
+
// kMaxTreeCompLength tail in the ringbuffer.
|
754
|
+
Store(ringbuffer, ringbuffer_mask, i);
|
755
|
+
}
|
756
|
+
}
|
757
|
+
}
|
758
|
+
|
759
|
+
static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
|
760
|
+
|
761
|
+
private:
|
762
|
+
// Stores the hash of the next 4 bytes and in a single tree-traversal, the
|
763
|
+
// hash bucket's binary tree is searched for matches and is re-rooted at the
|
764
|
+
// current position.
|
765
|
+
//
|
766
|
+
// If less than kMaxTreeCompLength data is available, the hash bucket of the
|
767
|
+
// current position is searched for matches, but the state of the hash table
|
768
|
+
// is not changed, since we can not know the final sorting order of the
|
769
|
+
// current (incomplete) sequence.
|
770
|
+
//
|
771
|
+
// This function must be called with increasing cur_ix positions.
|
772
|
+
BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
|
773
|
+
const size_t cur_ix,
|
774
|
+
const size_t ring_buffer_mask,
|
775
|
+
const size_t max_length,
|
776
|
+
size_t* const __restrict best_len,
|
777
|
+
BackwardMatch* __restrict matches) {
|
778
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
779
|
+
const size_t max_backward = window_mask_ - 15;
|
780
|
+
const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
|
781
|
+
const bool reroot_tree = max_length >= kMaxTreeCompLength;
|
782
|
+
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
783
|
+
size_t prev_ix = buckets_[key];
|
784
|
+
// The forest index of the rightmost node of the left subtree of the new
|
785
|
+
// root, updated as we traverse and reroot the tree of the hash bucket.
|
786
|
+
size_t node_left = LeftChildIndex(cur_ix);
|
787
|
+
// The forest index of the leftmost node of the right subtree of the new
|
788
|
+
// root, updated as we traverse and reroot the tree of the hash bucket.
|
789
|
+
size_t node_right = RightChildIndex(cur_ix);
|
790
|
+
// The match length of the rightmost node of the left subtree of the new
|
791
|
+
// root, updated as we traverse and reroot the tree of the hash bucket.
|
792
|
+
size_t best_len_left = 0;
|
793
|
+
// The match length of the leftmost node of the right subtree of the new
|
794
|
+
// root, updated as we traverse and reroot the tree of the hash bucket.
|
795
|
+
size_t best_len_right = 0;
|
796
|
+
if (reroot_tree) {
|
797
|
+
buckets_[key] = static_cast<uint32_t>(cur_ix);
|
798
|
+
}
|
799
|
+
for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
|
800
|
+
const size_t backward = cur_ix - prev_ix;
|
801
|
+
const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
|
802
|
+
if (backward == 0 || backward > max_backward || depth_remaining == 0) {
|
803
|
+
if (reroot_tree) {
|
804
|
+
forest_[node_left] = invalid_pos_;
|
805
|
+
forest_[node_right] = invalid_pos_;
|
806
|
+
}
|
807
|
+
break;
|
808
|
+
}
|
809
|
+
const size_t cur_len = std::min(best_len_left, best_len_right);
|
810
|
+
const size_t len = cur_len +
|
811
|
+
FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
|
812
|
+
&data[prev_ix_masked + cur_len],
|
813
|
+
max_length - cur_len);
|
814
|
+
if (len > *best_len) {
|
815
|
+
*best_len = len;
|
816
|
+
if (matches) {
|
817
|
+
*matches++ = BackwardMatch(backward, len);
|
818
|
+
}
|
819
|
+
if (len >= max_comp_len) {
|
820
|
+
if (reroot_tree) {
|
821
|
+
forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
|
822
|
+
forest_[node_right] = forest_[RightChildIndex(prev_ix)];
|
823
|
+
}
|
824
|
+
break;
|
825
|
+
}
|
826
|
+
}
|
827
|
+
if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
|
828
|
+
best_len_left = len;
|
829
|
+
if (reroot_tree) {
|
830
|
+
forest_[node_left] = static_cast<uint32_t>(prev_ix);
|
831
|
+
}
|
832
|
+
node_left = RightChildIndex(prev_ix);
|
833
|
+
prev_ix = forest_[node_left];
|
834
|
+
} else {
|
835
|
+
best_len_right = len;
|
836
|
+
if (reroot_tree) {
|
837
|
+
forest_[node_right] = static_cast<uint32_t>(prev_ix);
|
838
|
+
}
|
839
|
+
node_right = LeftChildIndex(prev_ix);
|
840
|
+
prev_ix = forest_[node_right];
|
841
|
+
}
|
842
|
+
}
|
843
|
+
return matches;
|
844
|
+
}
|
845
|
+
|
846
|
+
inline size_t LeftChildIndex(const size_t pos) {
|
847
|
+
return 2 * (pos & window_mask_);
|
848
|
+
}
|
849
|
+
|
850
|
+
inline size_t RightChildIndex(const size_t pos) {
|
851
|
+
return 2 * (pos & window_mask_) + 1;
|
852
|
+
}
|
853
|
+
|
854
|
+
static uint32_t HashBytes(const uint8_t *data) {
|
855
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
856
|
+
// The higher bits contain more mixture from the multiplication,
|
857
|
+
// so we take our results from there.
|
858
|
+
return h >> (32 - kBucketBits);
|
859
|
+
}
|
860
|
+
|
861
|
+
static const int kBucketBits = 17;
|
862
|
+
static const size_t kBucketSize = 1 << kBucketBits;
|
863
|
+
|
864
|
+
// The window size minus 1
|
865
|
+
size_t window_mask_;
|
866
|
+
|
867
|
+
// Hash table that maps the 4-byte hashes of the sequence to the last
|
868
|
+
// position where this hash was found, which is the root of the binary
|
869
|
+
// tree of sequences that share this hash bucket.
|
870
|
+
uint32_t buckets_[kBucketSize];
|
871
|
+
|
872
|
+
// The union of the binary trees of each hash bucket. The root of the tree
|
873
|
+
// corresponding to a hash is a sequence starting at buckets_[hash] and
|
874
|
+
// the left and right children of a sequence starting at pos are
|
875
|
+
// forest_[2 * pos] and forest_[2 * pos + 1].
|
876
|
+
uint32_t* forest_;
|
877
|
+
|
878
|
+
// A position used to mark a non-existent sequence, i.e. a tree is empty if
|
879
|
+
// its root is at invalid_pos_ and a node is a leaf if both its children
|
880
|
+
// are at invalid_pos_.
|
881
|
+
uint32_t invalid_pos_;
|
882
|
+
|
883
|
+
bool need_init_;
|
884
|
+
};
|
885
|
+
|
584
886
|
struct Hashers {
|
585
887
|
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
|
586
888
|
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
587
889
|
// and html inputs.
|
588
|
-
typedef HashLongestMatchQuickly<16, 1, true>
|
589
|
-
typedef HashLongestMatchQuickly<16, 2, false>
|
590
|
-
typedef HashLongestMatchQuickly<16, 4, false> H3;
|
890
|
+
typedef HashLongestMatchQuickly<16, 1, true> H2;
|
891
|
+
typedef HashLongestMatchQuickly<16, 2, false> H3;
|
591
892
|
typedef HashLongestMatchQuickly<17, 4, true> H4;
|
592
893
|
typedef HashLongestMatch<14, 4, 4> H5;
|
593
894
|
typedef HashLongestMatch<14, 5, 4> H6;
|
594
895
|
typedef HashLongestMatch<15, 6, 10> H7;
|
595
896
|
typedef HashLongestMatch<15, 7, 10> H8;
|
596
897
|
typedef HashLongestMatch<15, 8, 16> H9;
|
898
|
+
typedef HashToBinaryTree H10;
|
597
899
|
|
598
|
-
Hashers() :
|
599
|
-
|
900
|
+
Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
|
901
|
+
hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
|
600
902
|
|
601
|
-
~Hashers() {
|
602
|
-
delete hash_h1;
|
903
|
+
~Hashers(void) {
|
603
904
|
delete hash_h2;
|
604
905
|
delete hash_h3;
|
605
906
|
delete hash_h4;
|
@@ -608,11 +909,11 @@ struct Hashers {
|
|
608
909
|
delete hash_h7;
|
609
910
|
delete hash_h8;
|
610
911
|
delete hash_h9;
|
912
|
+
delete hash_h10;
|
611
913
|
}
|
612
914
|
|
613
915
|
void Init(int type) {
|
614
916
|
switch (type) {
|
615
|
-
case 1: hash_h1 = new H1; break;
|
616
917
|
case 2: hash_h2 = new H2; break;
|
617
918
|
case 3: hash_h3 = new H3; break;
|
618
919
|
case 4: hash_h4 = new H4; break;
|
@@ -621,12 +922,14 @@ struct Hashers {
|
|
621
922
|
case 7: hash_h7 = new H7; break;
|
622
923
|
case 8: hash_h8 = new H8; break;
|
623
924
|
case 9: hash_h9 = new H9; break;
|
925
|
+
case 10: hash_h10 = new H10; break;
|
624
926
|
default: break;
|
625
927
|
}
|
626
928
|
}
|
627
929
|
|
628
930
|
template<typename Hasher>
|
629
931
|
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
932
|
+
hasher->Init();
|
630
933
|
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
|
631
934
|
hasher->Store(&dict[i], static_cast<uint32_t>(i));
|
632
935
|
}
|
@@ -634,9 +937,8 @@ struct Hashers {
|
|
634
937
|
|
635
938
|
// Custom LZ77 window.
|
636
939
|
void PrependCustomDictionary(
|
637
|
-
int type, const size_t size, const uint8_t* dict) {
|
940
|
+
int type, int lgwin, const size_t size, const uint8_t* dict) {
|
638
941
|
switch (type) {
|
639
|
-
case 1: WarmupHash(size, dict, hash_h1); break;
|
640
942
|
case 2: WarmupHash(size, dict, hash_h2); break;
|
641
943
|
case 3: WarmupHash(size, dict, hash_h3); break;
|
642
944
|
case 4: WarmupHash(size, dict, hash_h4); break;
|
@@ -645,12 +947,17 @@ struct Hashers {
|
|
645
947
|
case 7: WarmupHash(size, dict, hash_h7); break;
|
646
948
|
case 8: WarmupHash(size, dict, hash_h8); break;
|
647
949
|
case 9: WarmupHash(size, dict, hash_h9); break;
|
950
|
+
case 10:
|
951
|
+
hash_h10->Init(lgwin, 0, size, false);
|
952
|
+
for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
|
953
|
+
hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
|
954
|
+
}
|
955
|
+
break;
|
648
956
|
default: break;
|
649
957
|
}
|
650
958
|
}
|
651
959
|
|
652
960
|
|
653
|
-
H1* hash_h1;
|
654
961
|
H2* hash_h2;
|
655
962
|
H3* hash_h3;
|
656
963
|
H4* hash_h4;
|
@@ -659,6 +966,7 @@ struct Hashers {
|
|
659
966
|
H7* hash_h7;
|
660
967
|
H8* hash_h8;
|
661
968
|
H9* hash_h9;
|
969
|
+
H10* hash_h10;
|
662
970
|
};
|
663
971
|
|
664
972
|
} // namespace brotli
|