brotli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +1 -1
- data/.travis.yml +2 -1
- data/README.md +1 -1
- data/Rakefile +1 -1
- data/ext/brotli/brotli.cc +1 -1
- data/ext/brotli/extconf.rb +72 -14
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/LICENSE +19 -202
- data/vendor/brotli/dec/Makefile +1 -1
- data/vendor/brotli/dec/bit_reader.c +23 -30
- data/vendor/brotli/dec/bit_reader.h +270 -141
- data/vendor/brotli/dec/context.h +3 -12
- data/vendor/brotli/dec/decode.c +1813 -1048
- data/vendor/brotli/dec/decode.h +22 -16
- data/vendor/brotli/dec/dictionary.c +9466 -0
- data/vendor/brotli/dec/dictionary.h +6 -9461
- data/vendor/brotli/dec/huffman.c +104 -71
- data/vendor/brotli/dec/huffman.h +19 -28
- data/vendor/brotli/dec/port.h +124 -32
- data/vendor/brotli/dec/prefix.h +4 -13
- data/vendor/brotli/dec/state.c +93 -56
- data/vendor/brotli/dec/state.h +124 -53
- data/vendor/brotli/dec/streams.c +14 -11
- data/vendor/brotli/dec/streams.h +6 -11
- data/vendor/brotli/dec/transform.h +2 -11
- data/vendor/brotli/dec/types.h +21 -19
- data/vendor/brotli/enc/Makefile +4 -1
- data/vendor/brotli/enc/backward_references.cc +87 -94
- data/vendor/brotli/enc/backward_references.h +8 -18
- data/vendor/brotli/enc/bit_cost.h +11 -19
- data/vendor/brotli/enc/block_splitter.cc +43 -48
- data/vendor/brotli/enc/block_splitter.h +7 -16
- data/vendor/brotli/enc/brotli_bit_stream.cc +48 -50
- data/vendor/brotli/enc/brotli_bit_stream.h +7 -16
- data/vendor/brotli/enc/cluster.h +24 -25
- data/vendor/brotli/enc/command.h +34 -41
- data/vendor/brotli/enc/context.h +11 -18
- data/vendor/brotli/enc/dictionary.cc +9466 -0
- data/vendor/brotli/enc/dictionary.h +20 -9464
- data/vendor/brotli/enc/dictionary_hash.h +7 -15
- data/vendor/brotli/enc/encode.cc +80 -148
- data/vendor/brotli/enc/encode.h +19 -29
- data/vendor/brotli/enc/encode_parallel.cc +35 -108
- data/vendor/brotli/enc/encode_parallel.h +7 -16
- data/vendor/brotli/enc/entropy_encode.cc +33 -42
- data/vendor/brotli/enc/entropy_encode.h +8 -16
- data/vendor/brotli/enc/fast_log.h +8 -15
- data/vendor/brotli/enc/find_match_length.h +7 -17
- data/vendor/brotli/enc/hash.h +130 -150
- data/vendor/brotli/enc/histogram.cc +7 -16
- data/vendor/brotli/enc/histogram.h +11 -17
- data/vendor/brotli/enc/literal_cost.cc +28 -35
- data/vendor/brotli/enc/literal_cost.h +9 -23
- data/vendor/brotli/enc/metablock.cc +18 -26
- data/vendor/brotli/enc/metablock.h +6 -14
- data/vendor/brotli/enc/port.h +14 -14
- data/vendor/brotli/enc/prefix.h +11 -18
- data/vendor/brotli/enc/ringbuffer.h +18 -27
- data/vendor/brotli/enc/static_dict.cc +7 -1
- data/vendor/brotli/enc/static_dict.h +7 -15
- data/vendor/brotli/enc/static_dict_lut.h +7 -15
- data/vendor/brotli/enc/streams.cc +15 -28
- data/vendor/brotli/enc/streams.h +27 -35
- data/vendor/brotli/enc/transform.h +9 -16
- data/vendor/brotli/enc/types.h +27 -0
- data/vendor/brotli/enc/utf8_util.cc +82 -0
- data/vendor/brotli/enc/utf8_util.h +25 -0
- data/vendor/brotli/enc/write_bits.h +11 -18
- metadata +7 -2
data/vendor/brotli/enc/hash.h
CHANGED
@@ -1,25 +1,15 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
1
|
+
/* Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
15
7
|
// A (forgetful) hash table to the data seen by the compressor, to
|
16
8
|
// help create backward references to previous data.
|
17
9
|
|
18
10
|
#ifndef BROTLI_ENC_HASH_H_
|
19
11
|
#define BROTLI_ENC_HASH_H_
|
20
12
|
|
21
|
-
#include <stddef.h>
|
22
|
-
#include <stdint.h>
|
23
13
|
#include <string.h>
|
24
14
|
#include <sys/types.h>
|
25
15
|
#include <algorithm>
|
@@ -34,6 +24,7 @@
|
|
34
24
|
#include "./prefix.h"
|
35
25
|
#include "./static_dict.h"
|
36
26
|
#include "./transform.h"
|
27
|
+
#include "./types.h"
|
37
28
|
|
38
29
|
namespace brotli {
|
39
30
|
|
@@ -137,20 +128,13 @@ class HashLongestMatchQuickly {
|
|
137
128
|
// Look at 4 bytes at data.
|
138
129
|
// Compute a hash from these, and store the value somewhere within
|
139
130
|
// [ix .. ix+3].
|
140
|
-
inline void Store(const uint8_t *data, const
|
131
|
+
inline void Store(const uint8_t *data, const uint32_t ix) {
|
141
132
|
const uint32_t key = HashBytes(data);
|
142
133
|
// Wiggle the value with the bucket sweep range.
|
143
|
-
const uint32_t off = (
|
134
|
+
const uint32_t off = (ix >> 3) % kBucketSweep;
|
144
135
|
buckets_[key + off] = ix;
|
145
136
|
}
|
146
137
|
|
147
|
-
// Store hashes for a range of data.
|
148
|
-
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
149
|
-
for (int p = 0; p < len; ++p) {
|
150
|
-
Store(&data[p & mask], startix + p);
|
151
|
-
}
|
152
|
-
}
|
153
|
-
|
154
138
|
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
155
139
|
// up to the length of max_length.
|
156
140
|
//
|
@@ -163,22 +147,22 @@ class HashLongestMatchQuickly {
|
|
163
147
|
const size_t ring_buffer_mask,
|
164
148
|
const int* __restrict distance_cache,
|
165
149
|
const uint32_t cur_ix,
|
166
|
-
const
|
150
|
+
const int max_length,
|
167
151
|
const uint32_t max_backward,
|
168
152
|
int * __restrict best_len_out,
|
169
153
|
int * __restrict best_len_code_out,
|
170
154
|
int * __restrict best_distance_out,
|
171
155
|
double* __restrict best_score_out) {
|
172
156
|
const int best_len_in = *best_len_out;
|
173
|
-
const
|
157
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
174
158
|
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
175
159
|
double best_score = *best_score_out;
|
176
160
|
int best_len = best_len_in;
|
177
|
-
int
|
178
|
-
|
161
|
+
int cached_backward = distance_cache[0];
|
162
|
+
uint32_t prev_ix = cur_ix - cached_backward;
|
179
163
|
bool match_found = false;
|
180
164
|
if (prev_ix < cur_ix) {
|
181
|
-
prev_ix &= ring_buffer_mask;
|
165
|
+
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
182
166
|
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
183
167
|
int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
184
168
|
&ring_buffer[cur_ix_masked],
|
@@ -188,7 +172,7 @@ class HashLongestMatchQuickly {
|
|
188
172
|
best_len = len;
|
189
173
|
*best_len_out = len;
|
190
174
|
*best_len_code_out = len;
|
191
|
-
*best_distance_out =
|
175
|
+
*best_distance_out = cached_backward;
|
192
176
|
*best_score_out = best_score;
|
193
177
|
compare_char = ring_buffer[cur_ix_masked + best_len];
|
194
178
|
if (kBucketSweep == 1) {
|
@@ -203,8 +187,8 @@ class HashLongestMatchQuickly {
|
|
203
187
|
if (kBucketSweep == 1) {
|
204
188
|
// Only one to look for, don't bother to prepare for a loop.
|
205
189
|
prev_ix = buckets_[key];
|
206
|
-
backward = cur_ix - prev_ix;
|
207
|
-
prev_ix &= ring_buffer_mask;
|
190
|
+
uint32_t backward = cur_ix - prev_ix;
|
191
|
+
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
208
192
|
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
209
193
|
return false;
|
210
194
|
}
|
@@ -225,8 +209,8 @@ class HashLongestMatchQuickly {
|
|
225
209
|
uint32_t *bucket = buckets_ + key;
|
226
210
|
prev_ix = *bucket++;
|
227
211
|
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
228
|
-
const
|
229
|
-
prev_ix &= ring_buffer_mask;
|
212
|
+
const uint32_t backward = cur_ix - prev_ix;
|
213
|
+
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
230
214
|
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
231
215
|
continue;
|
232
216
|
}
|
@@ -255,8 +239,8 @@ class HashLongestMatchQuickly {
|
|
255
239
|
if (kUseDictionary && !match_found &&
|
256
240
|
num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
257
241
|
++num_dict_lookups_;
|
258
|
-
const uint32_t
|
259
|
-
const uint16_t v = kStaticDictionaryHash[
|
242
|
+
const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
|
243
|
+
const uint16_t v = kStaticDictionaryHash[dict_key];
|
260
244
|
if (v > 0) {
|
261
245
|
const int len = v & 31;
|
262
246
|
const int dist = v >> 5;
|
@@ -270,7 +254,7 @@ class HashLongestMatchQuickly {
|
|
270
254
|
const int word_id =
|
271
255
|
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
272
256
|
dist;
|
273
|
-
const
|
257
|
+
const int backward = max_backward + word_id + 1;
|
274
258
|
const double score = BackwardReferenceScore(matchlen, backward);
|
275
259
|
if (best_score < score) {
|
276
260
|
++num_dict_matches_;
|
@@ -297,11 +281,10 @@ class HashLongestMatchQuickly {
|
|
297
281
|
static uint32_t HashBytes(const uint8_t *data) {
|
298
282
|
// Computing a hash based on 5 bytes works much better for
|
299
283
|
// qualities 1 and 3, where the next hash value is likely to replace
|
300
|
-
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
301
284
|
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
|
302
285
|
// The higher bits contain more mixture from the multiplication,
|
303
286
|
// so we take our results from there.
|
304
|
-
return h >> (64 - kBucketBits);
|
287
|
+
return static_cast<uint32_t>(h >> (64 - kBucketBits));
|
305
288
|
}
|
306
289
|
|
307
290
|
private:
|
@@ -337,20 +320,13 @@ class HashLongestMatch {
|
|
337
320
|
|
338
321
|
// Look at 3 bytes at data.
|
339
322
|
// Compute a hash from these, and store the value of ix at that position.
|
340
|
-
inline void Store(const uint8_t *data, const
|
323
|
+
inline void Store(const uint8_t *data, const uint32_t ix) {
|
341
324
|
const uint32_t key = HashBytes(data);
|
342
325
|
const int minor_ix = num_[key] & kBlockMask;
|
343
326
|
buckets_[key][minor_ix] = ix;
|
344
327
|
++num_[key];
|
345
328
|
}
|
346
329
|
|
347
|
-
// Store hashes for a range of data.
|
348
|
-
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
349
|
-
for (int p = 0; p < len; ++p) {
|
350
|
-
Store(&data[p & mask], startix + p);
|
351
|
-
}
|
352
|
-
}
|
353
|
-
|
354
330
|
// Find a longest backward match of &data[cur_ix] up to the length of
|
355
331
|
// max_length.
|
356
332
|
//
|
@@ -364,7 +340,7 @@ class HashLongestMatch {
|
|
364
340
|
const size_t ring_buffer_mask,
|
365
341
|
const int* __restrict distance_cache,
|
366
342
|
const uint32_t cur_ix,
|
367
|
-
|
343
|
+
const int max_length,
|
368
344
|
const uint32_t max_backward,
|
369
345
|
int * __restrict best_len_out,
|
370
346
|
int * __restrict best_len_code_out,
|
@@ -381,21 +357,21 @@ class HashLongestMatch {
|
|
381
357
|
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
|
382
358
|
const int idx = kDistanceCacheIndex[i];
|
383
359
|
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
|
384
|
-
|
360
|
+
uint32_t prev_ix = cur_ix - backward;
|
385
361
|
if (prev_ix >= cur_ix) {
|
386
362
|
continue;
|
387
363
|
}
|
388
|
-
if (PREDICT_FALSE(backward > max_backward)) {
|
364
|
+
if (PREDICT_FALSE(backward > (int)max_backward)) {
|
389
365
|
continue;
|
390
366
|
}
|
391
|
-
prev_ix &= ring_buffer_mask;
|
367
|
+
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
392
368
|
|
393
369
|
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
394
370
|
prev_ix + best_len > ring_buffer_mask ||
|
395
371
|
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
396
372
|
continue;
|
397
373
|
}
|
398
|
-
const
|
374
|
+
const int len =
|
399
375
|
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
400
376
|
max_length);
|
401
377
|
if (len >= 3 || (len == 2 && i < 2)) {
|
@@ -415,46 +391,44 @@ class HashLongestMatch {
|
|
415
391
|
}
|
416
392
|
}
|
417
393
|
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
418
|
-
const
|
394
|
+
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
419
395
|
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
420
396
|
for (int i = num_[key] - 1; i >= down; --i) {
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
match_found = true;
|
449
|
-
}
|
397
|
+
uint32_t prev_ix = bucket[i & kBlockMask];
|
398
|
+
const uint32_t backward = cur_ix - prev_ix;
|
399
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
400
|
+
break;
|
401
|
+
}
|
402
|
+
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
403
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
404
|
+
prev_ix + best_len > ring_buffer_mask ||
|
405
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
406
|
+
continue;
|
407
|
+
}
|
408
|
+
const int len =
|
409
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
410
|
+
max_length);
|
411
|
+
if (len >= 4) {
|
412
|
+
// Comparing for >= 3 does not change the semantics, but just saves
|
413
|
+
// for a few unnecessary binary logarithms in backward reference
|
414
|
+
// score, since we are not interested in such short matches.
|
415
|
+
double score = BackwardReferenceScore(len, backward);
|
416
|
+
if (best_score < score) {
|
417
|
+
best_score = score;
|
418
|
+
best_len = len;
|
419
|
+
*best_len_out = best_len;
|
420
|
+
*best_len_code_out = best_len;
|
421
|
+
*best_distance_out = backward;
|
422
|
+
*best_score_out = best_score;
|
423
|
+
match_found = true;
|
450
424
|
}
|
451
425
|
}
|
452
426
|
}
|
453
427
|
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
454
|
-
uint32_t
|
455
|
-
for (int k = 0; k < 2; ++k, ++
|
428
|
+
uint32_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
|
429
|
+
for (int k = 0; k < 2; ++k, ++dict_key) {
|
456
430
|
++num_dict_lookups_;
|
457
|
-
const uint16_t v = kStaticDictionaryHash[
|
431
|
+
const uint16_t v = kStaticDictionaryHash[dict_key];
|
458
432
|
if (v > 0) {
|
459
433
|
const int len = v & 31;
|
460
434
|
const int dist = v >> 5;
|
@@ -468,7 +442,7 @@ class HashLongestMatch {
|
|
468
442
|
const int word_id =
|
469
443
|
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
470
444
|
dist;
|
471
|
-
const
|
445
|
+
const int backward = max_backward + word_id + 1;
|
472
446
|
double score = BackwardReferenceScore(matchlen, backward);
|
473
447
|
if (best_score < score) {
|
474
448
|
++num_dict_matches_;
|
@@ -500,7 +474,7 @@ class HashLongestMatch {
|
|
500
474
|
void FindAllMatches(const uint8_t* data,
|
501
475
|
const size_t ring_buffer_mask,
|
502
476
|
const uint32_t cur_ix,
|
503
|
-
|
477
|
+
const int max_length,
|
504
478
|
const uint32_t max_backward,
|
505
479
|
int* num_matches,
|
506
480
|
BackwardMatch* matches) const {
|
@@ -520,7 +494,7 @@ class HashLongestMatch {
|
|
520
494
|
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
521
495
|
continue;
|
522
496
|
}
|
523
|
-
const
|
497
|
+
const int len =
|
524
498
|
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
525
499
|
max_length);
|
526
500
|
if (len > best_len) {
|
@@ -528,35 +502,33 @@ class HashLongestMatch {
|
|
528
502
|
if (len > kMaxZopfliLen) {
|
529
503
|
matches = orig_matches;
|
530
504
|
}
|
531
|
-
*matches++ = BackwardMatch(backward, len);
|
505
|
+
*matches++ = BackwardMatch(static_cast<int>(backward), len);
|
532
506
|
}
|
533
507
|
}
|
534
508
|
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
535
|
-
const
|
509
|
+
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
536
510
|
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
537
511
|
for (int i = num_[key] - 1; i >= down; --i) {
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
matches = orig_matches;
|
557
|
-
}
|
558
|
-
*matches++ = BackwardMatch(backward, len);
|
512
|
+
uint32_t prev_ix = bucket[i & kBlockMask];
|
513
|
+
const uint32_t backward = cur_ix - prev_ix;
|
514
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
515
|
+
break;
|
516
|
+
}
|
517
|
+
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
518
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
519
|
+
prev_ix + best_len > ring_buffer_mask ||
|
520
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
521
|
+
continue;
|
522
|
+
}
|
523
|
+
const int len =
|
524
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
525
|
+
max_length);
|
526
|
+
if (len > best_len) {
|
527
|
+
best_len = len;
|
528
|
+
if (len > kMaxZopfliLen) {
|
529
|
+
matches = orig_matches;
|
559
530
|
}
|
531
|
+
*matches++ = BackwardMatch(backward, len);
|
560
532
|
}
|
561
533
|
}
|
562
534
|
std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
|
@@ -572,7 +544,7 @@ class HashLongestMatch {
|
|
572
544
|
}
|
573
545
|
}
|
574
546
|
}
|
575
|
-
*num_matches += matches - orig_matches;
|
547
|
+
*num_matches += static_cast<int>(matches - orig_matches);
|
576
548
|
}
|
577
549
|
|
578
550
|
enum { kHashLength = 4 };
|
@@ -582,14 +554,6 @@ class HashLongestMatch {
|
|
582
554
|
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
583
555
|
// classes have separate, different implementations of hashing.
|
584
556
|
static uint32_t HashBytes(const uint8_t *data) {
|
585
|
-
// kHashMul32 multiplier has these properties:
|
586
|
-
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
587
|
-
// * No long streaks of 1s or 0s.
|
588
|
-
// * Is not unfortunate (see the unittest) for the English language.
|
589
|
-
// * There is no effort to ensure that it is a prime, the oddity is enough
|
590
|
-
// for this use.
|
591
|
-
// * The number has been tuned heuristically against compression benchmarks.
|
592
|
-
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
593
557
|
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
594
558
|
// The higher bits contain more mixture from the multiplication,
|
595
559
|
// so we take our results from there.
|
@@ -611,7 +575,7 @@ class HashLongestMatch {
|
|
611
575
|
uint16_t num_[kBucketSize];
|
612
576
|
|
613
577
|
// Buckets containing kBlockSize of backward references.
|
614
|
-
|
578
|
+
uint32_t buckets_[kBucketSize][kBlockSize];
|
615
579
|
|
616
580
|
size_t num_dict_lookups_;
|
617
581
|
size_t num_dict_matches_;
|
@@ -631,17 +595,32 @@ struct Hashers {
|
|
631
595
|
typedef HashLongestMatch<15, 7, 10> H8;
|
632
596
|
typedef HashLongestMatch<15, 8, 16> H9;
|
633
597
|
|
598
|
+
Hashers() : hash_h1(0), hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
|
599
|
+
hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0) {}
|
600
|
+
|
601
|
+
~Hashers() {
|
602
|
+
delete hash_h1;
|
603
|
+
delete hash_h2;
|
604
|
+
delete hash_h3;
|
605
|
+
delete hash_h4;
|
606
|
+
delete hash_h5;
|
607
|
+
delete hash_h6;
|
608
|
+
delete hash_h7;
|
609
|
+
delete hash_h8;
|
610
|
+
delete hash_h9;
|
611
|
+
}
|
612
|
+
|
634
613
|
void Init(int type) {
|
635
614
|
switch (type) {
|
636
|
-
case 1: hash_h1
|
637
|
-
case 2: hash_h2
|
638
|
-
case 3: hash_h3
|
639
|
-
case 4: hash_h4
|
640
|
-
case 5: hash_h5
|
641
|
-
case 6: hash_h6
|
642
|
-
case 7: hash_h7
|
643
|
-
case 8: hash_h8
|
644
|
-
case 9: hash_h9
|
615
|
+
case 1: hash_h1 = new H1; break;
|
616
|
+
case 2: hash_h2 = new H2; break;
|
617
|
+
case 3: hash_h3 = new H3; break;
|
618
|
+
case 4: hash_h4 = new H4; break;
|
619
|
+
case 5: hash_h5 = new H5; break;
|
620
|
+
case 6: hash_h6 = new H6; break;
|
621
|
+
case 7: hash_h7 = new H7; break;
|
622
|
+
case 8: hash_h8 = new H8; break;
|
623
|
+
case 9: hash_h9 = new H9; break;
|
645
624
|
default: break;
|
646
625
|
}
|
647
626
|
}
|
@@ -649,7 +628,7 @@ struct Hashers {
|
|
649
628
|
template<typename Hasher>
|
650
629
|
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
651
630
|
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
|
652
|
-
hasher->Store(dict, i);
|
631
|
+
hasher->Store(&dict[i], static_cast<uint32_t>(i));
|
653
632
|
}
|
654
633
|
}
|
655
634
|
|
@@ -657,28 +636,29 @@ struct Hashers {
|
|
657
636
|
void PrependCustomDictionary(
|
658
637
|
int type, const size_t size, const uint8_t* dict) {
|
659
638
|
switch (type) {
|
660
|
-
case 1: WarmupHash(size, dict, hash_h1
|
661
|
-
case 2: WarmupHash(size, dict, hash_h2
|
662
|
-
case 3: WarmupHash(size, dict, hash_h3
|
663
|
-
case 4: WarmupHash(size, dict, hash_h4
|
664
|
-
case 5: WarmupHash(size, dict, hash_h5
|
665
|
-
case 6: WarmupHash(size, dict, hash_h6
|
666
|
-
case 7: WarmupHash(size, dict, hash_h7
|
667
|
-
case 8: WarmupHash(size, dict, hash_h8
|
668
|
-
case 9: WarmupHash(size, dict, hash_h9
|
639
|
+
case 1: WarmupHash(size, dict, hash_h1); break;
|
640
|
+
case 2: WarmupHash(size, dict, hash_h2); break;
|
641
|
+
case 3: WarmupHash(size, dict, hash_h3); break;
|
642
|
+
case 4: WarmupHash(size, dict, hash_h4); break;
|
643
|
+
case 5: WarmupHash(size, dict, hash_h5); break;
|
644
|
+
case 6: WarmupHash(size, dict, hash_h6); break;
|
645
|
+
case 7: WarmupHash(size, dict, hash_h7); break;
|
646
|
+
case 8: WarmupHash(size, dict, hash_h8); break;
|
647
|
+
case 9: WarmupHash(size, dict, hash_h9); break;
|
669
648
|
default: break;
|
670
649
|
}
|
671
650
|
}
|
672
651
|
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
652
|
+
|
653
|
+
H1* hash_h1;
|
654
|
+
H2* hash_h2;
|
655
|
+
H3* hash_h3;
|
656
|
+
H4* hash_h4;
|
657
|
+
H5* hash_h5;
|
658
|
+
H6* hash_h6;
|
659
|
+
H7* hash_h7;
|
660
|
+
H8* hash_h8;
|
661
|
+
H9* hash_h9;
|
682
662
|
};
|
683
663
|
|
684
664
|
} // namespace brotli
|
@@ -1,22 +1,13 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
15
7
|
// Build per-context histograms of literals, commands and distance codes.
|
16
8
|
|
17
9
|
#include "./histogram.h"
|
18
10
|
|
19
|
-
#include <stdint.h>
|
20
11
|
#include <cmath>
|
21
12
|
|
22
13
|
#include "./block_splitter.h"
|
@@ -45,7 +36,7 @@ void BuildHistograms(
|
|
45
36
|
BlockSplitIterator literal_it(literal_split);
|
46
37
|
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
47
38
|
BlockSplitIterator dist_it(dist_split);
|
48
|
-
for (
|
39
|
+
for (size_t i = 0; i < num_commands; ++i) {
|
49
40
|
const Command &cmd = cmds[i];
|
50
41
|
insert_and_copy_it.Next();
|
51
42
|
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
@@ -1,33 +1,26 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
15
7
|
// Models the histograms of literals, commands and distance codes.
|
16
8
|
|
17
9
|
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
18
10
|
#define BROTLI_ENC_HISTOGRAM_H_
|
19
11
|
|
20
|
-
#include <stdint.h>
|
21
12
|
#include <string.h>
|
13
|
+
#include <limits>
|
22
14
|
#include <vector>
|
23
15
|
#include <utility>
|
24
16
|
#include "./command.h"
|
25
17
|
#include "./fast_log.h"
|
26
18
|
#include "./prefix.h"
|
19
|
+
#include "./types.h"
|
27
20
|
|
28
21
|
namespace brotli {
|
29
22
|
|
30
|
-
|
23
|
+
struct BlockSplit;
|
31
24
|
|
32
25
|
// A simple container for histograms of data in blocks.
|
33
26
|
template<int kDataSize>
|
@@ -38,6 +31,7 @@ struct Histogram {
|
|
38
31
|
void Clear() {
|
39
32
|
memset(data_, 0, sizeof(data_));
|
40
33
|
total_count_ = 0;
|
34
|
+
bit_cost_ = std::numeric_limits<double>::infinity();
|
41
35
|
}
|
42
36
|
void Add(int val) {
|
43
37
|
++data_[val];
|
@@ -49,7 +43,7 @@ struct Histogram {
|
|
49
43
|
}
|
50
44
|
template<typename DataType>
|
51
45
|
void Add(const DataType *p, size_t n) {
|
52
|
-
total_count_ += n;
|
46
|
+
total_count_ += static_cast<int>(n);
|
53
47
|
n += 1;
|
54
48
|
while(--n) ++data_[*p++];
|
55
49
|
}
|