brotli 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/Gemfile +6 -2
- data/Rakefile +18 -6
- data/bin/before_install.sh +9 -0
- data/brotli.gemspec +7 -13
- data/ext/brotli/brotli.c +209 -11
- data/ext/brotli/buffer.c +1 -7
- data/ext/brotli/buffer.h +1 -1
- data/ext/brotli/extconf.rb +45 -26
- data/lib/brotli/version.rb +1 -1
- data/smoke.sh +1 -1
- data/test/brotli_test.rb +104 -0
- data/test/brotli_writer_test.rb +36 -0
- data/test/test_helper.rb +8 -0
- data/vendor/brotli/c/common/constants.c +15 -0
- data/vendor/brotli/c/common/constants.h +149 -6
- data/vendor/brotli/c/{dec/context.h → common/context.c} +91 -186
- data/vendor/brotli/c/common/context.h +113 -0
- data/vendor/brotli/c/common/dictionary.bin +0 -0
- data/vendor/brotli/c/common/dictionary.bin.br +0 -0
- data/vendor/brotli/c/common/dictionary.c +11 -2
- data/vendor/brotli/c/common/dictionary.h +4 -4
- data/vendor/brotli/c/common/platform.c +22 -0
- data/vendor/brotli/c/common/platform.h +594 -0
- data/vendor/brotli/c/common/transform.c +291 -0
- data/vendor/brotli/c/common/transform.h +85 -0
- data/vendor/brotli/c/common/version.h +8 -1
- data/vendor/brotli/c/dec/bit_reader.c +29 -1
- data/vendor/brotli/c/dec/bit_reader.h +91 -100
- data/vendor/brotli/c/dec/decode.c +665 -437
- data/vendor/brotli/c/dec/huffman.c +65 -84
- data/vendor/brotli/c/dec/huffman.h +67 -14
- data/vendor/brotli/c/dec/prefix.h +1 -20
- data/vendor/brotli/c/dec/state.c +32 -45
- data/vendor/brotli/c/dec/state.h +173 -55
- data/vendor/brotli/c/enc/backward_references.c +27 -16
- data/vendor/brotli/c/enc/backward_references.h +7 -7
- data/vendor/brotli/c/enc/backward_references_hq.c +155 -116
- data/vendor/brotli/c/enc/backward_references_hq.h +22 -23
- data/vendor/brotli/c/enc/backward_references_inc.h +32 -22
- data/vendor/brotli/c/enc/bit_cost.c +1 -1
- data/vendor/brotli/c/enc/bit_cost.h +5 -5
- data/vendor/brotli/c/enc/block_encoder_inc.h +7 -6
- data/vendor/brotli/c/enc/block_splitter.c +5 -6
- data/vendor/brotli/c/enc/block_splitter.h +1 -1
- data/vendor/brotli/c/enc/block_splitter_inc.h +26 -17
- data/vendor/brotli/c/enc/brotli_bit_stream.c +107 -123
- data/vendor/brotli/c/enc/brotli_bit_stream.h +19 -38
- data/vendor/brotli/c/enc/cluster.c +1 -1
- data/vendor/brotli/c/enc/cluster.h +1 -1
- data/vendor/brotli/c/enc/cluster_inc.h +6 -3
- data/vendor/brotli/c/enc/command.c +28 -0
- data/vendor/brotli/c/enc/command.h +52 -42
- data/vendor/brotli/c/enc/compress_fragment.c +21 -22
- data/vendor/brotli/c/enc/compress_fragment.h +1 -1
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +102 -69
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +1 -1
- data/vendor/brotli/c/enc/dictionary_hash.c +1827 -1101
- data/vendor/brotli/c/enc/dictionary_hash.h +2 -1
- data/vendor/brotli/c/enc/encode.c +358 -195
- data/vendor/brotli/c/enc/encoder_dict.c +33 -0
- data/vendor/brotli/c/enc/encoder_dict.h +43 -0
- data/vendor/brotli/c/enc/entropy_encode.c +16 -14
- data/vendor/brotli/c/enc/entropy_encode.h +7 -7
- data/vendor/brotli/c/enc/entropy_encode_static.h +3 -3
- data/vendor/brotli/c/enc/fast_log.c +105 -0
- data/vendor/brotli/c/enc/fast_log.h +20 -99
- data/vendor/brotli/c/enc/find_match_length.h +5 -6
- data/vendor/brotli/c/enc/hash.h +145 -103
- data/vendor/brotli/c/enc/hash_composite_inc.h +125 -0
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +93 -53
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +54 -53
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +58 -54
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +95 -63
- data/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +46 -43
- data/vendor/brotli/c/enc/histogram.c +9 -6
- data/vendor/brotli/c/enc/histogram.h +6 -3
- data/vendor/brotli/c/enc/histogram_inc.h +1 -1
- data/vendor/brotli/c/enc/literal_cost.c +5 -5
- data/vendor/brotli/c/enc/literal_cost.h +2 -2
- data/vendor/brotli/c/enc/memory.c +5 -16
- data/vendor/brotli/c/enc/memory.h +52 -1
- data/vendor/brotli/c/enc/metablock.c +171 -36
- data/vendor/brotli/c/enc/metablock.h +13 -8
- data/vendor/brotli/c/enc/metablock_inc.h +2 -2
- data/vendor/brotli/c/enc/params.h +46 -0
- data/vendor/brotli/c/enc/prefix.h +3 -4
- data/vendor/brotli/c/enc/quality.h +29 -24
- data/vendor/brotli/c/enc/ringbuffer.h +19 -12
- data/vendor/brotli/c/enc/static_dict.c +49 -45
- data/vendor/brotli/c/enc/static_dict.h +4 -3
- data/vendor/brotli/c/enc/static_dict_lut.h +1 -1
- data/vendor/brotli/c/enc/utf8_util.c +21 -21
- data/vendor/brotli/c/enc/utf8_util.h +1 -1
- data/vendor/brotli/c/enc/write_bits.h +35 -38
- data/vendor/brotli/c/include/brotli/decode.h +13 -8
- data/vendor/brotli/c/include/brotli/encode.h +54 -8
- data/vendor/brotli/c/include/brotli/port.h +225 -83
- data/vendor/brotli/c/include/brotli/types.h +0 -7
- metadata +28 -87
- data/.travis.yml +0 -30
- data/spec/brotli_spec.rb +0 -88
- data/spec/inflate_spec.rb +0 -75
- data/spec/spec_helper.rb +0 -4
- data/vendor/brotli/c/dec/port.h +0 -168
- data/vendor/brotli/c/dec/transform.h +0 -300
- data/vendor/brotli/c/enc/context.h +0 -184
- data/vendor/brotli/c/enc/port.h +0 -184
@@ -5,15 +5,16 @@
|
|
5
5
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
6
|
*/
|
7
7
|
|
8
|
-
/* template parameters: FN, BUCKET_BITS,
|
8
|
+
/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
|
9
9
|
USE_DICTIONARY
|
10
10
|
*/
|
11
11
|
|
12
12
|
#define HashLongestMatchQuickly HASHER()
|
13
13
|
|
14
14
|
#define BUCKET_SIZE (1 << BUCKET_BITS)
|
15
|
-
|
16
|
-
#define
|
15
|
+
#define BUCKET_MASK (BUCKET_SIZE - 1)
|
16
|
+
#define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
|
17
|
+
#define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
|
17
18
|
|
18
19
|
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
|
19
20
|
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
|
@@ -32,39 +33,50 @@ static uint32_t FN(HashBytes)(const uint8_t* data) {
|
|
32
33
|
/* A (forgetful) hash table to the data seen by the compressor, to
|
33
34
|
help create backward references to previous data.
|
34
35
|
|
35
|
-
This is a hash map of fixed size (BUCKET_SIZE).
|
36
|
-
given index, BUCKET_SWEEP buckets are used to store values of a key. */
|
36
|
+
This is a hash map of fixed size (BUCKET_SIZE). */
|
37
37
|
typedef struct HashLongestMatchQuickly {
|
38
|
-
|
39
|
-
|
38
|
+
/* Shortcuts. */
|
39
|
+
HasherCommon* common;
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
/* --- Dynamic size members --- */
|
42
|
+
|
43
|
+
uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
|
44
|
+
} HashLongestMatchQuickly;
|
44
45
|
|
45
46
|
static void FN(Initialize)(
|
46
|
-
|
47
|
-
|
47
|
+
HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
|
48
|
+
const BrotliEncoderParams* params) {
|
49
|
+
self->common = common;
|
50
|
+
|
48
51
|
BROTLI_UNUSED(params);
|
52
|
+
self->buckets_ = (uint32_t*)common->extra;
|
49
53
|
}
|
50
54
|
|
51
|
-
static void FN(Prepare)(
|
52
|
-
|
53
|
-
|
55
|
+
static void FN(Prepare)(
|
56
|
+
HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
|
57
|
+
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
|
58
|
+
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
|
54
59
|
/* Partial preparation is 100 times slower (per socket). */
|
55
|
-
size_t partial_prepare_threshold =
|
60
|
+
size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
|
56
61
|
if (one_shot && input_size <= partial_prepare_threshold) {
|
57
62
|
size_t i;
|
58
63
|
for (i = 0; i < input_size; ++i) {
|
59
64
|
const uint32_t key = FN(HashBytes)(&data[i]);
|
60
|
-
|
65
|
+
if (BUCKET_SWEEP == 1) {
|
66
|
+
buckets[key] = 0;
|
67
|
+
} else {
|
68
|
+
uint32_t j;
|
69
|
+
for (j = 0; j < BUCKET_SWEEP; ++j) {
|
70
|
+
buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
|
71
|
+
}
|
72
|
+
}
|
61
73
|
}
|
62
74
|
} else {
|
63
75
|
/* It is not strictly necessary to fill this buffer here, but
|
64
76
|
not filling will make the results of the compression stochastic
|
65
77
|
(but correct). This is because random data would cause the
|
66
78
|
system to find accidentally good backward references here and there. */
|
67
|
-
memset(
|
79
|
+
memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
|
68
80
|
}
|
69
81
|
}
|
70
82
|
|
@@ -74,45 +86,53 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
|
74
86
|
BROTLI_UNUSED(params);
|
75
87
|
BROTLI_UNUSED(one_shot);
|
76
88
|
BROTLI_UNUSED(input_size);
|
77
|
-
return sizeof(
|
89
|
+
return sizeof(uint32_t) * BUCKET_SIZE;
|
78
90
|
}
|
79
91
|
|
80
92
|
/* Look at 5 bytes at &data[ix & mask].
|
81
93
|
Compute a hash from these, and store the value somewhere within
|
82
94
|
[ix .. ix+3]. */
|
83
|
-
static BROTLI_INLINE void FN(Store)(
|
84
|
-
|
95
|
+
static BROTLI_INLINE void FN(Store)(
|
96
|
+
HashLongestMatchQuickly* BROTLI_RESTRICT self,
|
97
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
|
85
98
|
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
|
86
|
-
|
87
|
-
|
88
|
-
|
99
|
+
if (BUCKET_SWEEP == 1) {
|
100
|
+
self->buckets_[key] = (uint32_t)ix;
|
101
|
+
} else {
|
102
|
+
/* Wiggle the value with the bucket sweep range. */
|
103
|
+
const uint32_t off = ix & BUCKET_SWEEP_MASK;
|
104
|
+
self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
|
105
|
+
}
|
89
106
|
}
|
90
107
|
|
91
|
-
static BROTLI_INLINE void FN(StoreRange)(
|
92
|
-
|
93
|
-
const size_t
|
108
|
+
static BROTLI_INLINE void FN(StoreRange)(
|
109
|
+
HashLongestMatchQuickly* BROTLI_RESTRICT self,
|
110
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
|
111
|
+
const size_t ix_start, const size_t ix_end) {
|
94
112
|
size_t i;
|
95
113
|
for (i = ix_start; i < ix_end; ++i) {
|
96
|
-
FN(Store)(
|
114
|
+
FN(Store)(self, data, mask, i);
|
97
115
|
}
|
98
116
|
}
|
99
117
|
|
100
118
|
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
101
|
-
|
119
|
+
HashLongestMatchQuickly* BROTLI_RESTRICT self,
|
120
|
+
size_t num_bytes, size_t position,
|
102
121
|
const uint8_t* ringbuffer, size_t ringbuffer_mask) {
|
103
122
|
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
104
123
|
/* Prepare the hashes for three last bytes of the last write.
|
105
124
|
These could not be calculated before, since they require knowledge
|
106
125
|
of both the previous and the current block. */
|
107
|
-
FN(Store)(
|
108
|
-
FN(Store)(
|
109
|
-
FN(Store)(
|
126
|
+
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
|
127
|
+
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
|
128
|
+
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
|
110
129
|
}
|
111
130
|
}
|
112
131
|
|
113
132
|
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
114
|
-
|
115
|
-
|
133
|
+
HashLongestMatchQuickly* BROTLI_RESTRICT self,
|
134
|
+
int* BROTLI_RESTRICT distance_cache) {
|
135
|
+
BROTLI_UNUSED(self);
|
116
136
|
BROTLI_UNUSED(distance_cache);
|
117
137
|
}
|
118
138
|
|
@@ -125,16 +145,19 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
|
125
145
|
Writes the best match into |out|.
|
126
146
|
|out|->score is updated only if a better match is found. */
|
127
147
|
static BROTLI_INLINE void FN(FindLongestMatch)(
|
128
|
-
|
129
|
-
const
|
148
|
+
HashLongestMatchQuickly* BROTLI_RESTRICT self,
|
149
|
+
const BrotliEncoderDictionary* dictionary,
|
150
|
+
const uint8_t* BROTLI_RESTRICT data,
|
130
151
|
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
|
131
152
|
const size_t cur_ix, const size_t max_length, const size_t max_backward,
|
132
|
-
const size_t
|
133
|
-
|
153
|
+
const size_t dictionary_distance, const size_t max_distance,
|
154
|
+
HasherSearchResult* BROTLI_RESTRICT out) {
|
155
|
+
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
|
134
156
|
const size_t best_len_in = out->len;
|
135
157
|
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
136
|
-
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
137
158
|
int compare_char = data[cur_ix_masked + best_len_in];
|
159
|
+
size_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
160
|
+
size_t key_out;
|
138
161
|
score_t min_score = out->score;
|
139
162
|
score_t best_score = out->score;
|
140
163
|
size_t best_len = best_len_in;
|
@@ -144,21 +167,21 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
|
|
144
167
|
if (prev_ix < cur_ix) {
|
145
168
|
prev_ix &= (uint32_t)ring_buffer_mask;
|
146
169
|
if (compare_char == data[prev_ix + best_len]) {
|
147
|
-
size_t len = FindMatchLengthWithLimit(
|
148
|
-
|
149
|
-
max_length);
|
170
|
+
const size_t len = FindMatchLengthWithLimit(
|
171
|
+
&data[prev_ix], &data[cur_ix_masked], max_length);
|
150
172
|
if (len >= 4) {
|
151
173
|
const score_t score = BackwardReferenceScoreUsingLastDistance(len);
|
152
174
|
if (best_score < score) {
|
153
|
-
best_score = score;
|
154
|
-
best_len = len;
|
155
175
|
out->len = len;
|
156
176
|
out->distance = cached_backward;
|
157
|
-
out->score =
|
158
|
-
compare_char = data[cur_ix_masked + best_len];
|
177
|
+
out->score = score;
|
159
178
|
if (BUCKET_SWEEP == 1) {
|
160
|
-
|
179
|
+
buckets[key] = (uint32_t)cur_ix;
|
161
180
|
return;
|
181
|
+
} else {
|
182
|
+
best_len = len;
|
183
|
+
best_score = score;
|
184
|
+
compare_char = data[cur_ix_masked + len];
|
162
185
|
}
|
163
186
|
}
|
164
187
|
}
|
@@ -168,8 +191,8 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
|
|
168
191
|
size_t backward;
|
169
192
|
size_t len;
|
170
193
|
/* Only one to look for, don't bother to prepare for a loop. */
|
171
|
-
prev_ix =
|
172
|
-
|
194
|
+
prev_ix = buckets[key];
|
195
|
+
buckets[key] = (uint32_t)cur_ix;
|
173
196
|
backward = cur_ix - prev_ix;
|
174
197
|
prev_ix &= (uint32_t)ring_buffer_mask;
|
175
198
|
if (compare_char != data[prev_ix + best_len_in]) {
|
@@ -191,12 +214,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
|
|
191
214
|
}
|
192
215
|
}
|
193
216
|
} else {
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
217
|
+
size_t keys[BUCKET_SWEEP];
|
218
|
+
size_t i;
|
219
|
+
for (i = 0; i < BUCKET_SWEEP; ++i) {
|
220
|
+
keys[i] = (key + (i << 3)) & BUCKET_MASK;
|
221
|
+
}
|
222
|
+
key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
|
223
|
+
for (i = 0; i < BUCKET_SWEEP; ++i) {
|
199
224
|
size_t len;
|
225
|
+
size_t backward;
|
226
|
+
prev_ix = buckets[keys[i]];
|
227
|
+
backward = cur_ix - prev_ix;
|
200
228
|
prev_ix &= (uint32_t)ring_buffer_mask;
|
201
229
|
if (compare_char != data[prev_ix + best_len]) {
|
202
230
|
continue;
|
@@ -210,25 +238,29 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
|
|
210
238
|
if (len >= 4) {
|
211
239
|
const score_t score = BackwardReferenceScore(len, backward);
|
212
240
|
if (best_score < score) {
|
213
|
-
best_score = score;
|
214
241
|
best_len = len;
|
215
|
-
out->len =
|
216
|
-
|
242
|
+
out->len = len;
|
243
|
+
compare_char = data[cur_ix_masked + len];
|
244
|
+
best_score = score;
|
217
245
|
out->score = score;
|
218
|
-
|
246
|
+
out->distance = backward;
|
219
247
|
}
|
220
248
|
}
|
221
249
|
}
|
222
250
|
}
|
223
251
|
if (USE_DICTIONARY && min_score == out->score) {
|
224
|
-
SearchInStaticDictionary(dictionary,
|
225
|
-
|
226
|
-
BROTLI_TRUE);
|
252
|
+
SearchInStaticDictionary(dictionary,
|
253
|
+
self->common, &data[cur_ix_masked], max_length, dictionary_distance,
|
254
|
+
max_distance, out, BROTLI_TRUE);
|
255
|
+
}
|
256
|
+
if (BUCKET_SWEEP != 1) {
|
257
|
+
buckets[key_out] = (uint32_t)cur_ix;
|
227
258
|
}
|
228
|
-
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
|
229
259
|
}
|
230
260
|
|
231
|
-
#undef
|
261
|
+
#undef BUCKET_SWEEP_MASK
|
262
|
+
#undef BUCKET_SWEEP
|
263
|
+
#undef BUCKET_MASK
|
232
264
|
#undef BUCKET_SIZE
|
233
265
|
|
234
266
|
#undef HashLongestMatchQuickly
|
@@ -0,0 +1,212 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2018 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
|
9
|
+
/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
|
10
|
+
/* JUMP = skip bytes for speedup */
|
11
|
+
|
12
|
+
/* Rolling hash for long distance long string matches. Stores one position
|
13
|
+
per bucket, bucket key is computed over a long region. */
|
14
|
+
|
15
|
+
#define HashRolling HASHER()
|
16
|
+
|
17
|
+
static const uint32_t FN(kRollingHashMul32) = 69069;
|
18
|
+
static const uint32_t FN(kInvalidPos) = 0xffffffff;
|
19
|
+
|
20
|
+
/* This hasher uses a longer forward length, but returning a higher value here
|
21
|
+
will hurt compression by the main hasher when combined with a composite
|
22
|
+
hasher. The hasher tests for forward itself instead. */
|
23
|
+
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
|
24
|
+
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
|
25
|
+
|
26
|
+
/* Computes a code from a single byte. A lookup table of 256 values could be
|
27
|
+
used, but simply adding 1 works about as good. */
|
28
|
+
static uint32_t FN(HashByte)(uint8_t byte) {
|
29
|
+
return (uint32_t)byte + 1u;
|
30
|
+
}
|
31
|
+
|
32
|
+
static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
|
33
|
+
uint32_t factor) {
|
34
|
+
return (uint32_t)(factor * state + FN(HashByte)(add));
|
35
|
+
}
|
36
|
+
|
37
|
+
static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
|
38
|
+
uint8_t rem, uint32_t factor,
|
39
|
+
uint32_t factor_remove) {
|
40
|
+
return (uint32_t)(factor * state +
|
41
|
+
FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
|
42
|
+
}
|
43
|
+
|
44
|
+
typedef struct HashRolling {
|
45
|
+
uint32_t state;
|
46
|
+
uint32_t* table;
|
47
|
+
size_t next_ix;
|
48
|
+
|
49
|
+
uint32_t chunk_len;
|
50
|
+
uint32_t factor;
|
51
|
+
uint32_t factor_remove;
|
52
|
+
} HashRolling;
|
53
|
+
|
54
|
+
static void FN(Initialize)(
|
55
|
+
HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
|
56
|
+
const BrotliEncoderParams* params) {
|
57
|
+
size_t i;
|
58
|
+
self->state = 0;
|
59
|
+
self->next_ix = 0;
|
60
|
+
|
61
|
+
self->factor = FN(kRollingHashMul32);
|
62
|
+
|
63
|
+
/* Compute the factor of the oldest byte to remove: factor**steps modulo
|
64
|
+
0xffffffff (the multiplications rely on 32-bit overflow) */
|
65
|
+
self->factor_remove = 1;
|
66
|
+
for (i = 0; i < CHUNKLEN; i += JUMP) {
|
67
|
+
self->factor_remove *= self->factor;
|
68
|
+
}
|
69
|
+
|
70
|
+
self->table = (uint32_t*)common->extra;
|
71
|
+
for (i = 0; i < NUMBUCKETS; i++) {
|
72
|
+
self->table[i] = FN(kInvalidPos);
|
73
|
+
}
|
74
|
+
|
75
|
+
BROTLI_UNUSED(params);
|
76
|
+
}
|
77
|
+
|
78
|
+
static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
|
79
|
+
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
|
80
|
+
size_t i;
|
81
|
+
/* Too small size, cannot use this hasher. */
|
82
|
+
if (input_size < CHUNKLEN) return;
|
83
|
+
self->state = 0;
|
84
|
+
for (i = 0; i < CHUNKLEN; i += JUMP) {
|
85
|
+
self->state = FN(HashRollingFunctionInitial)(
|
86
|
+
self->state, data[i], self->factor);
|
87
|
+
}
|
88
|
+
BROTLI_UNUSED(one_shot);
|
89
|
+
}
|
90
|
+
|
91
|
+
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
92
|
+
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
93
|
+
size_t input_size) {
|
94
|
+
return NUMBUCKETS * sizeof(uint32_t);
|
95
|
+
BROTLI_UNUSED(params);
|
96
|
+
BROTLI_UNUSED(one_shot);
|
97
|
+
BROTLI_UNUSED(input_size);
|
98
|
+
}
|
99
|
+
|
100
|
+
static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
|
101
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
|
102
|
+
BROTLI_UNUSED(self);
|
103
|
+
BROTLI_UNUSED(data);
|
104
|
+
BROTLI_UNUSED(mask);
|
105
|
+
BROTLI_UNUSED(ix);
|
106
|
+
}
|
107
|
+
|
108
|
+
static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
|
109
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
|
110
|
+
const size_t ix_start, const size_t ix_end) {
|
111
|
+
BROTLI_UNUSED(self);
|
112
|
+
BROTLI_UNUSED(data);
|
113
|
+
BROTLI_UNUSED(mask);
|
114
|
+
BROTLI_UNUSED(ix_start);
|
115
|
+
BROTLI_UNUSED(ix_end);
|
116
|
+
}
|
117
|
+
|
118
|
+
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
119
|
+
HashRolling* BROTLI_RESTRICT self,
|
120
|
+
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
121
|
+
size_t ring_buffer_mask) {
|
122
|
+
/* In this case we must re-initialize the hasher from scratch from the
|
123
|
+
current position. */
|
124
|
+
size_t position_masked;
|
125
|
+
size_t available = num_bytes;
|
126
|
+
if ((position & (JUMP - 1)) != 0) {
|
127
|
+
size_t diff = JUMP - (position & (JUMP - 1));
|
128
|
+
available = (diff > available) ? 0 : (available - diff);
|
129
|
+
position += diff;
|
130
|
+
}
|
131
|
+
position_masked = position & ring_buffer_mask;
|
132
|
+
/* wrapping around ringbuffer not handled. */
|
133
|
+
if (available > ring_buffer_mask - position_masked) {
|
134
|
+
available = ring_buffer_mask - position_masked;
|
135
|
+
}
|
136
|
+
|
137
|
+
FN(Prepare)(self, BROTLI_FALSE, available,
|
138
|
+
ringbuffer + (position & ring_buffer_mask));
|
139
|
+
self->next_ix = position;
|
140
|
+
BROTLI_UNUSED(num_bytes);
|
141
|
+
}
|
142
|
+
|
143
|
+
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
144
|
+
HashRolling* BROTLI_RESTRICT self,
|
145
|
+
int* BROTLI_RESTRICT distance_cache) {
|
146
|
+
BROTLI_UNUSED(self);
|
147
|
+
BROTLI_UNUSED(distance_cache);
|
148
|
+
}
|
149
|
+
|
150
|
+
static BROTLI_INLINE void FN(FindLongestMatch)(
|
151
|
+
HashRolling* BROTLI_RESTRICT self,
|
152
|
+
const BrotliEncoderDictionary* dictionary,
|
153
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
154
|
+
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
155
|
+
const size_t max_length, const size_t max_backward,
|
156
|
+
const size_t dictionary_distance, const size_t max_distance,
|
157
|
+
HasherSearchResult* BROTLI_RESTRICT out) {
|
158
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
159
|
+
size_t pos;
|
160
|
+
|
161
|
+
if ((cur_ix & (JUMP - 1)) != 0) return;
|
162
|
+
|
163
|
+
/* Not enough lookahead */
|
164
|
+
if (max_length < CHUNKLEN) return;
|
165
|
+
|
166
|
+
for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
|
167
|
+
uint32_t code = self->state & MASK;
|
168
|
+
|
169
|
+
uint8_t rem = data[pos & ring_buffer_mask];
|
170
|
+
uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
|
171
|
+
size_t found_ix = FN(kInvalidPos);
|
172
|
+
|
173
|
+
self->state = FN(HashRollingFunction)(
|
174
|
+
self->state, add, rem, self->factor, self->factor_remove);
|
175
|
+
|
176
|
+
if (code < NUMBUCKETS) {
|
177
|
+
found_ix = self->table[code];
|
178
|
+
self->table[code] = (uint32_t)pos;
|
179
|
+
if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
|
180
|
+
/* The cast to 32-bit makes backward distances up to 4GB work even
|
181
|
+
if cur_ix is above 4GB, despite using 32-bit values in the table. */
|
182
|
+
size_t backward = (uint32_t)(cur_ix - found_ix);
|
183
|
+
if (backward <= max_backward) {
|
184
|
+
const size_t found_ix_masked = found_ix & ring_buffer_mask;
|
185
|
+
const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
|
186
|
+
&data[cur_ix_masked],
|
187
|
+
max_length);
|
188
|
+
if (len >= 4 && len > out->len) {
|
189
|
+
score_t score = BackwardReferenceScore(len, backward);
|
190
|
+
if (score > out->score) {
|
191
|
+
out->len = len;
|
192
|
+
out->distance = backward;
|
193
|
+
out->score = score;
|
194
|
+
out->len_code_delta = 0;
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
}
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
self->next_ix = cur_ix + JUMP;
|
203
|
+
|
204
|
+
/* NOTE: this hasher does not search in the dictionary. It is used as
|
205
|
+
backup-hasher, the main hasher already searches in it. */
|
206
|
+
BROTLI_UNUSED(dictionary);
|
207
|
+
BROTLI_UNUSED(distance_cache);
|
208
|
+
BROTLI_UNUSED(dictionary_distance);
|
209
|
+
BROTLI_UNUSED(max_distance);
|
210
|
+
}
|
211
|
+
|
212
|
+
#undef HashRolling
|