brotli 0.1.8 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +7 -3
- data/brotli.gemspec +1 -1
- data/ext/brotli/brotli.c +4 -4
- data/ext/brotli/brotli.h +2 -2
- data/ext/brotli/extconf.rb +9 -16
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/{common → c/common}/constants.h +11 -1
- data/vendor/brotli/c/common/dictionary.bin +432 -0
- data/vendor/brotli/c/common/dictionary.c +5905 -0
- data/vendor/brotli/c/common/dictionary.h +64 -0
- data/vendor/brotli/c/common/version.h +19 -0
- data/vendor/brotli/{dec → c/dec}/bit_reader.c +2 -2
- data/vendor/brotli/{dec → c/dec}/bit_reader.h +11 -34
- data/vendor/brotli/{dec → c/dec}/context.h +1 -1
- data/vendor/brotli/{dec → c/dec}/decode.c +389 -356
- data/vendor/brotli/{dec → c/dec}/huffman.c +24 -23
- data/vendor/brotli/{dec → c/dec}/huffman.h +1 -1
- data/vendor/brotli/{dec → c/dec}/port.h +19 -10
- data/vendor/brotli/{dec → c/dec}/prefix.h +1 -1
- data/vendor/brotli/{dec → c/dec}/state.c +23 -19
- data/vendor/brotli/{dec → c/dec}/state.h +18 -17
- data/vendor/brotli/{dec → c/dec}/transform.h +2 -2
- data/vendor/brotli/c/enc/backward_references.c +134 -0
- data/vendor/brotli/c/enc/backward_references.h +39 -0
- data/vendor/brotli/{enc/backward_references.c → c/enc/backward_references_hq.c} +144 -232
- data/vendor/brotli/{enc/backward_references.h → c/enc/backward_references_hq.h} +28 -31
- data/vendor/brotli/{enc → c/enc}/backward_references_inc.h +37 -31
- data/vendor/brotli/{enc → c/enc}/bit_cost.c +1 -1
- data/vendor/brotli/{enc → c/enc}/bit_cost.h +1 -1
- data/vendor/brotli/{enc → c/enc}/bit_cost_inc.h +0 -0
- data/vendor/brotli/{enc → c/enc}/block_encoder_inc.h +0 -0
- data/vendor/brotli/{enc → c/enc}/block_splitter.c +2 -4
- data/vendor/brotli/{enc → c/enc}/block_splitter.h +1 -1
- data/vendor/brotli/{enc → c/enc}/block_splitter_inc.h +6 -7
- data/vendor/brotli/{enc → c/enc}/brotli_bit_stream.c +22 -26
- data/vendor/brotli/{enc → c/enc}/brotli_bit_stream.h +1 -5
- data/vendor/brotli/{enc → c/enc}/cluster.c +1 -1
- data/vendor/brotli/{enc → c/enc}/cluster.h +1 -1
- data/vendor/brotli/{enc → c/enc}/cluster_inc.h +2 -0
- data/vendor/brotli/{enc → c/enc}/command.h +34 -17
- data/vendor/brotli/{enc → c/enc}/compress_fragment.c +97 -53
- data/vendor/brotli/{enc → c/enc}/compress_fragment.h +5 -2
- data/vendor/brotli/{enc → c/enc}/compress_fragment_two_pass.c +106 -51
- data/vendor/brotli/{enc → c/enc}/compress_fragment_two_pass.h +5 -2
- data/vendor/brotli/{enc → c/enc}/context.h +3 -3
- data/vendor/brotli/c/enc/dictionary_hash.c +1120 -0
- data/vendor/brotli/c/enc/dictionary_hash.h +24 -0
- data/vendor/brotli/{enc → c/enc}/encode.c +442 -240
- data/vendor/brotli/{enc → c/enc}/entropy_encode.c +9 -9
- data/vendor/brotli/{enc → c/enc}/entropy_encode.h +4 -4
- data/vendor/brotli/{enc → c/enc}/entropy_encode_static.h +4 -4
- data/vendor/brotli/{enc → c/enc}/fast_log.h +3 -3
- data/vendor/brotli/{enc → c/enc}/find_match_length.h +8 -8
- data/vendor/brotli/c/enc/hash.h +446 -0
- data/vendor/brotli/{enc → c/enc}/hash_forgetful_chain_inc.h +72 -68
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +266 -0
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
- data/vendor/brotli/{enc → c/enc}/hash_longest_match_quickly_inc.h +81 -77
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +326 -0
- data/vendor/brotli/{enc → c/enc}/histogram.c +4 -2
- data/vendor/brotli/{enc → c/enc}/histogram.h +1 -1
- data/vendor/brotli/{enc → c/enc}/histogram_inc.h +0 -0
- data/vendor/brotli/{enc → c/enc}/literal_cost.c +4 -7
- data/vendor/brotli/{enc → c/enc}/literal_cost.h +2 -2
- data/vendor/brotli/{enc → c/enc}/memory.c +1 -1
- data/vendor/brotli/{enc → c/enc}/memory.h +3 -2
- data/vendor/brotli/{enc → c/enc}/metablock.c +136 -123
- data/vendor/brotli/{enc → c/enc}/metablock.h +2 -12
- data/vendor/brotli/{enc → c/enc}/metablock_inc.h +0 -0
- data/vendor/brotli/{enc → c/enc}/port.h +49 -33
- data/vendor/brotli/{enc → c/enc}/prefix.h +4 -2
- data/vendor/brotli/{enc → c/enc}/quality.h +47 -17
- data/vendor/brotli/{enc → c/enc}/ringbuffer.h +6 -6
- data/vendor/brotli/{enc → c/enc}/static_dict.c +26 -22
- data/vendor/brotli/{enc → c/enc}/static_dict.h +3 -1
- data/vendor/brotli/c/enc/static_dict_lut.h +5864 -0
- data/vendor/brotli/{enc → c/enc}/utf8_util.c +1 -1
- data/vendor/brotli/{enc → c/enc}/utf8_util.h +2 -2
- data/vendor/brotli/{enc → c/enc}/write_bits.h +3 -3
- data/vendor/brotli/c/include/brotli/decode.h +339 -0
- data/vendor/brotli/c/include/brotli/encode.h +402 -0
- data/vendor/brotli/c/include/brotli/port.h +146 -0
- data/vendor/brotli/c/include/brotli/types.h +90 -0
- metadata +80 -79
- data/vendor/brotli/common/dictionary.c +0 -9474
- data/vendor/brotli/common/dictionary.h +0 -29
- data/vendor/brotli/common/port.h +0 -107
- data/vendor/brotli/common/types.h +0 -58
- data/vendor/brotli/dec/decode.h +0 -188
- data/vendor/brotli/enc/compressor.cc +0 -139
- data/vendor/brotli/enc/compressor.h +0 -161
- data/vendor/brotli/enc/dictionary_hash.h +0 -4121
- data/vendor/brotli/enc/encode.h +0 -221
- data/vendor/brotli/enc/encode_parallel.cc +0 -289
- data/vendor/brotli/enc/encode_parallel.h +0 -27
- data/vendor/brotli/enc/hash.h +0 -717
- data/vendor/brotli/enc/hash_longest_match_inc.h +0 -241
- data/vendor/brotli/enc/static_dict_lut.h +0 -11241
- data/vendor/brotli/enc/streams.cc +0 -114
- data/vendor/brotli/enc/streams.h +0 -121
@@ -11,7 +11,7 @@
|
|
11
11
|
#include <string.h> /* memset */
|
12
12
|
|
13
13
|
#include "../common/constants.h"
|
14
|
-
#include
|
14
|
+
#include <brotli/types.h>
|
15
15
|
#include "./port.h"
|
16
16
|
|
17
17
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -246,7 +246,7 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
|
246
246
|
size_t limit;
|
247
247
|
size_t sum;
|
248
248
|
const size_t streak_limit = 1240;
|
249
|
-
/* Let's make the Huffman code more compatible with
|
249
|
+
/* Let's make the Huffman code more compatible with RLE encoding. */
|
250
250
|
size_t i;
|
251
251
|
for (i = 0; i < length; i++) {
|
252
252
|
if (counts[i]) {
|
@@ -293,10 +293,10 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
|
293
293
|
}
|
294
294
|
}
|
295
295
|
/* 2) Let's mark all population counts that already can be encoded
|
296
|
-
with an
|
296
|
+
with an RLE code. */
|
297
297
|
memset(good_for_rle, 0, length);
|
298
298
|
{
|
299
|
-
/* Let's not spoil any of the existing good
|
299
|
+
/* Let's not spoil any of the existing good RLE codes.
|
300
300
|
Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
301
301
|
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
|
302
302
|
uint32_t symbol = counts[0];
|
@@ -319,7 +319,7 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
|
319
319
|
}
|
320
320
|
}
|
321
321
|
}
|
322
|
-
/* 3) Let's replace those population counts that lead to more
|
322
|
+
/* 3) Let's replace those population counts that lead to more RLE codes.
|
323
323
|
Math here is in 24.8 fixed point representation. */
|
324
324
|
stride = 0;
|
325
325
|
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
@@ -420,15 +420,15 @@ void BrotliWriteHuffmanTree(const uint8_t* depth,
|
|
420
420
|
}
|
421
421
|
}
|
422
422
|
|
423
|
-
/* First gather statistics on if it is a good idea to do
|
423
|
+
/* First gather statistics on if it is a good idea to do RLE. */
|
424
424
|
if (length > 50) {
|
425
|
-
/* Find
|
426
|
-
Shorter codes seem not to benefit from
|
425
|
+
/* Find RLE coding for longer codes.
|
426
|
+
Shorter codes seem not to benefit from RLE. */
|
427
427
|
DecideOverRleUse(depth, new_length,
|
428
428
|
&use_rle_for_non_zero, &use_rle_for_zero);
|
429
429
|
}
|
430
430
|
|
431
|
-
/* Actual
|
431
|
+
/* Actual RLE coding. */
|
432
432
|
for (i = 0; i < new_length;) {
|
433
433
|
const uint8_t value = depth[i];
|
434
434
|
size_t reps = 1;
|
@@ -9,7 +9,7 @@
|
|
9
9
|
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
10
10
|
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
11
11
|
|
12
|
-
#include
|
12
|
+
#include <brotli/types.h>
|
13
13
|
#include "./port.h"
|
14
14
|
|
15
15
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -30,7 +30,7 @@ static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
|
|
30
30
|
self->index_right_or_value_ = right;
|
31
31
|
}
|
32
32
|
|
33
|
-
/* Returns 1 is assignment of depths
|
33
|
+
/* Returns 1 is assignment of depths succeeded, otherwise 0. */
|
34
34
|
BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
|
35
35
|
int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
|
36
36
|
|
@@ -53,7 +53,7 @@ BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
|
|
53
53
|
uint8_t *depth);
|
54
54
|
|
55
55
|
/* Change the population counts in a way that the consequent
|
56
|
-
Huffman tree compression, especially its
|
56
|
+
Huffman tree compression, especially its RLE-part will be more
|
57
57
|
likely to compress this data more efficiently.
|
58
58
|
|
59
59
|
length contains the size of the histogram.
|
@@ -62,7 +62,7 @@ BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
|
|
62
62
|
BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
|
63
63
|
size_t length, uint32_t* counts, uint8_t* good_for_rle);
|
64
64
|
|
65
|
-
/* Write a Huffman tree from bit depths into the
|
65
|
+
/* Write a Huffman tree from bit depths into the bit-stream representation
|
66
66
|
of a Huffman tree. The generated Huffman tree is to be compressed once
|
67
67
|
more using a Huffman tree */
|
68
68
|
BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
|
@@ -10,8 +10,8 @@
|
|
10
10
|
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
11
11
|
|
12
12
|
#include "../common/constants.h"
|
13
|
-
#include
|
14
|
-
#include
|
13
|
+
#include <brotli/port.h>
|
14
|
+
#include <brotli/types.h>
|
15
15
|
#include "./write_bits.h"
|
16
16
|
|
17
17
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -83,7 +83,7 @@ static const uint32_t kCodeLengthBits[18] = {
|
|
83
83
|
static BROTLI_INLINE void StoreStaticCodeLengthCode(
|
84
84
|
size_t* storage_ix, uint8_t* storage) {
|
85
85
|
BrotliWriteBits(
|
86
|
-
40,
|
86
|
+
40, BROTLI_MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage);
|
87
87
|
}
|
88
88
|
|
89
89
|
static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
@@ -516,7 +516,7 @@ static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
|
516
516
|
static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
|
517
517
|
size_t* storage_ix, uint8_t* storage) {
|
518
518
|
BrotliWriteBits(
|
519
|
-
56,
|
519
|
+
56, BROTLI_MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
|
520
520
|
BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
|
521
521
|
}
|
522
522
|
|
@@ -11,15 +11,15 @@
|
|
11
11
|
|
12
12
|
#include <math.h>
|
13
13
|
|
14
|
-
#include
|
15
|
-
#include
|
14
|
+
#include <brotli/types.h>
|
15
|
+
#include <brotli/port.h>
|
16
16
|
|
17
17
|
#if defined(__cplusplus) || defined(c_plusplus)
|
18
18
|
extern "C" {
|
19
19
|
#endif
|
20
20
|
|
21
21
|
static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
|
22
|
-
#
|
22
|
+
#if BROTLI_MODERN_COMPILER || __has_builtin(__builtin_clz)
|
23
23
|
return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
|
24
24
|
#else
|
25
25
|
uint32_t result = 0;
|
@@ -9,7 +9,7 @@
|
|
9
9
|
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
10
10
|
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
11
11
|
|
12
|
-
#include
|
12
|
+
#include <brotli/types.h>
|
13
13
|
#include "./port.h"
|
14
14
|
|
15
15
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -17,21 +17,21 @@ extern "C" {
|
|
17
17
|
#endif
|
18
18
|
|
19
19
|
/* Separate implementation for little-endian 64-bit targets, for speed. */
|
20
|
-
#if defined(__GNUC__) && defined(_LP64) && defined(
|
20
|
+
#if defined(__GNUC__) && defined(_LP64) && defined(BROTLI_LITTLE_ENDIAN)
|
21
21
|
|
22
22
|
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
23
23
|
const uint8_t* s2,
|
24
24
|
size_t limit) {
|
25
25
|
size_t matched = 0;
|
26
26
|
size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */
|
27
|
-
while (
|
28
|
-
if (
|
29
|
-
|
27
|
+
while (BROTLI_PREDICT_TRUE(--limit2)) {
|
28
|
+
if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) ==
|
29
|
+
BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) {
|
30
30
|
s2 += 8;
|
31
31
|
matched += 8;
|
32
32
|
} else {
|
33
|
-
uint64_t x =
|
34
|
-
|
33
|
+
uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
|
34
|
+
BROTLI_UNALIGNED_LOAD64LE(s1 + matched);
|
35
35
|
size_t matching_bits = (size_t)__builtin_ctzll(x);
|
36
36
|
matched += matching_bits >> 3;
|
37
37
|
return matched;
|
@@ -39,7 +39,7 @@ static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
|
39
39
|
}
|
40
40
|
limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */
|
41
41
|
while (--limit) {
|
42
|
-
if (
|
42
|
+
if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) {
|
43
43
|
++s2;
|
44
44
|
++matched;
|
45
45
|
} else {
|
@@ -0,0 +1,446 @@
|
|
1
|
+
/* Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* A (forgetful) hash table to the data seen by the compressor, to
|
8
|
+
help create backward references to previous data. */
|
9
|
+
|
10
|
+
#ifndef BROTLI_ENC_HASH_H_
|
11
|
+
#define BROTLI_ENC_HASH_H_
|
12
|
+
|
13
|
+
#include <string.h> /* memcmp, memset */
|
14
|
+
|
15
|
+
#include "../common/constants.h"
|
16
|
+
#include "../common/dictionary.h"
|
17
|
+
#include <brotli/types.h>
|
18
|
+
#include "./fast_log.h"
|
19
|
+
#include "./find_match_length.h"
|
20
|
+
#include "./memory.h"
|
21
|
+
#include "./port.h"
|
22
|
+
#include "./quality.h"
|
23
|
+
#include "./static_dict.h"
|
24
|
+
|
25
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
26
|
+
extern "C" {
|
27
|
+
#endif
|
28
|
+
|
29
|
+
/* Pointer to hasher data.
|
30
|
+
*
|
31
|
+
* Excluding initialization and destruction, hasher can be passed as
|
32
|
+
* HasherHandle by value.
|
33
|
+
*
|
34
|
+
* Typically hasher data consists of 3 sections:
|
35
|
+
* * HasherCommon structure
|
36
|
+
* * private structured hasher data, depending on hasher type
|
37
|
+
* * private dynamic hasher data, depending on hasher type and parameters
|
38
|
+
*/
|
39
|
+
typedef uint8_t* HasherHandle;
|
40
|
+
|
41
|
+
typedef struct {
|
42
|
+
BrotliHasherParams params;
|
43
|
+
|
44
|
+
/* False if hasher needs to be "prepared" before use. */
|
45
|
+
BROTLI_BOOL is_prepared_;
|
46
|
+
|
47
|
+
size_t dict_num_lookups;
|
48
|
+
size_t dict_num_matches;
|
49
|
+
} HasherCommon;
|
50
|
+
|
51
|
+
static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
|
52
|
+
return (HasherCommon*)handle;
|
53
|
+
}
|
54
|
+
|
55
|
+
#define score_t size_t
|
56
|
+
|
57
|
+
static const uint32_t kCutoffTransformsCount = 10;
|
58
|
+
/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
|
59
|
+
/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
|
60
|
+
static const uint64_t kCutoffTransforms =
|
61
|
+
BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
|
62
|
+
|
63
|
+
typedef struct HasherSearchResult {
|
64
|
+
size_t len;
|
65
|
+
size_t distance;
|
66
|
+
score_t score;
|
67
|
+
int len_code_delta; /* == len_code - len */
|
68
|
+
} HasherSearchResult;
|
69
|
+
|
70
|
+
/* kHashMul32 multiplier has these properties:
|
71
|
+
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
72
|
+
* No long streaks of ones or zeros.
|
73
|
+
* There is no effort to ensure that it is a prime, the oddity is enough
|
74
|
+
for this use.
|
75
|
+
* The number has been tuned heuristically against compression benchmarks. */
|
76
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
77
|
+
static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd);
|
78
|
+
static const uint64_t kHashMul64Long =
|
79
|
+
BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U);
|
80
|
+
|
81
|
+
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
|
82
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
83
|
+
/* The higher bits contain more mixture from the multiplication,
|
84
|
+
so we take our results from there. */
|
85
|
+
return h >> (32 - 14);
|
86
|
+
}
|
87
|
+
|
88
|
+
static BROTLI_INLINE void PrepareDistanceCache(
|
89
|
+
int* BROTLI_RESTRICT distance_cache, const int num_distances) {
|
90
|
+
if (num_distances > 4) {
|
91
|
+
int last_distance = distance_cache[0];
|
92
|
+
distance_cache[4] = last_distance - 1;
|
93
|
+
distance_cache[5] = last_distance + 1;
|
94
|
+
distance_cache[6] = last_distance - 2;
|
95
|
+
distance_cache[7] = last_distance + 2;
|
96
|
+
distance_cache[8] = last_distance - 3;
|
97
|
+
distance_cache[9] = last_distance + 3;
|
98
|
+
if (num_distances > 10) {
|
99
|
+
int next_last_distance = distance_cache[1];
|
100
|
+
distance_cache[10] = next_last_distance - 1;
|
101
|
+
distance_cache[11] = next_last_distance + 1;
|
102
|
+
distance_cache[12] = next_last_distance - 2;
|
103
|
+
distance_cache[13] = next_last_distance + 2;
|
104
|
+
distance_cache[14] = next_last_distance - 3;
|
105
|
+
distance_cache[15] = next_last_distance + 3;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
#define BROTLI_LITERAL_BYTE_SCORE 135
|
111
|
+
#define BROTLI_DISTANCE_BIT_PENALTY 30
|
112
|
+
/* Score must be positive after applying maximal penalty. */
|
113
|
+
#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
|
114
|
+
|
115
|
+
/* Usually, we always choose the longest backward reference. This function
|
116
|
+
allows for the exception of that rule.
|
117
|
+
|
118
|
+
If we choose a backward reference that is further away, it will
|
119
|
+
usually be coded with more bits. We approximate this by assuming
|
120
|
+
log2(distance). If the distance can be expressed in terms of the
|
121
|
+
last four distances, we use some heuristic constants to estimate
|
122
|
+
the bits cost. For the first up to four literals we use the bit
|
123
|
+
cost of the literals from the literal cost model, after that we
|
124
|
+
use the average bit cost of the cost model.
|
125
|
+
|
126
|
+
This function is used to sometimes discard a longer backward reference
|
127
|
+
when it is not much longer and the bit cost for encoding it is more
|
128
|
+
than the saved literals.
|
129
|
+
|
130
|
+
backward_reference_offset MUST be positive. */
|
131
|
+
static BROTLI_INLINE score_t BackwardReferenceScore(
|
132
|
+
size_t copy_length, size_t backward_reference_offset) {
|
133
|
+
return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
|
134
|
+
BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
|
135
|
+
}
|
136
|
+
|
137
|
+
static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
|
138
|
+
size_t copy_length) {
|
139
|
+
return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
|
140
|
+
BROTLI_SCORE_BASE + 15;
|
141
|
+
}
|
142
|
+
|
143
|
+
static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
|
144
|
+
size_t distance_short_code) {
|
145
|
+
return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
|
146
|
+
}
|
147
|
+
|
148
|
+
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
|
149
|
+
const BrotliDictionary* dictionary, size_t item, const uint8_t* data,
|
150
|
+
size_t max_length, size_t max_backward, HasherSearchResult* out) {
|
151
|
+
size_t len;
|
152
|
+
size_t dist;
|
153
|
+
size_t offset;
|
154
|
+
size_t matchlen;
|
155
|
+
size_t backward;
|
156
|
+
score_t score;
|
157
|
+
len = item & 0x1F;
|
158
|
+
dist = item >> 5;
|
159
|
+
offset = dictionary->offsets_by_length[len] + len * dist;
|
160
|
+
if (len > max_length) {
|
161
|
+
return BROTLI_FALSE;
|
162
|
+
}
|
163
|
+
|
164
|
+
matchlen =
|
165
|
+
FindMatchLengthWithLimit(data, &dictionary->data[offset], len);
|
166
|
+
if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
|
167
|
+
return BROTLI_FALSE;
|
168
|
+
}
|
169
|
+
{
|
170
|
+
size_t cut = len - matchlen;
|
171
|
+
size_t transform_id =
|
172
|
+
(cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F);
|
173
|
+
backward = max_backward + dist + 1 +
|
174
|
+
(transform_id << dictionary->size_bits_by_length[len]);
|
175
|
+
}
|
176
|
+
if (backward >= BROTLI_MAX_DISTANCE) {
|
177
|
+
return BROTLI_FALSE;
|
178
|
+
}
|
179
|
+
score = BackwardReferenceScore(matchlen, backward);
|
180
|
+
if (score < out->score) {
|
181
|
+
return BROTLI_FALSE;
|
182
|
+
}
|
183
|
+
out->len = matchlen;
|
184
|
+
out->len_code_delta = (int)len - (int)matchlen;
|
185
|
+
out->distance = backward;
|
186
|
+
out->score = score;
|
187
|
+
return BROTLI_TRUE;
|
188
|
+
}
|
189
|
+
|
190
|
+
static BROTLI_INLINE void SearchInStaticDictionary(
|
191
|
+
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
|
192
|
+
HasherHandle handle, const uint8_t* data, size_t max_length,
|
193
|
+
size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
|
194
|
+
size_t key;
|
195
|
+
size_t i;
|
196
|
+
HasherCommon* self = GetHasherCommon(handle);
|
197
|
+
if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
|
198
|
+
return;
|
199
|
+
}
|
200
|
+
key = Hash14(data) << 1;
|
201
|
+
for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
|
202
|
+
size_t item = dictionary_hash[key];
|
203
|
+
self->dict_num_lookups++;
|
204
|
+
if (item != 0) {
|
205
|
+
BROTLI_BOOL item_matches = TestStaticDictionaryItem(
|
206
|
+
dictionary, item, data, max_length, max_backward, out);
|
207
|
+
if (item_matches) {
|
208
|
+
self->dict_num_matches++;
|
209
|
+
}
|
210
|
+
}
|
211
|
+
}
|
212
|
+
}
|
213
|
+
|
214
|
+
typedef struct BackwardMatch {
|
215
|
+
uint32_t distance;
|
216
|
+
uint32_t length_and_code;
|
217
|
+
} BackwardMatch;
|
218
|
+
|
219
|
+
static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
|
220
|
+
size_t dist, size_t len) {
|
221
|
+
self->distance = (uint32_t)dist;
|
222
|
+
self->length_and_code = (uint32_t)(len << 5);
|
223
|
+
}
|
224
|
+
|
225
|
+
static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
|
226
|
+
size_t dist, size_t len, size_t len_code) {
|
227
|
+
self->distance = (uint32_t)dist;
|
228
|
+
self->length_and_code =
|
229
|
+
(uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
|
230
|
+
}
|
231
|
+
|
232
|
+
static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
|
233
|
+
return self->length_and_code >> 5;
|
234
|
+
}
|
235
|
+
|
236
|
+
static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
|
237
|
+
size_t code = self->length_and_code & 31;
|
238
|
+
return code ? code : BackwardMatchLength(self);
|
239
|
+
}
|
240
|
+
|
241
|
+
#define EXPAND_CAT(a, b) CAT(a, b)
|
242
|
+
#define CAT(a, b) a ## b
|
243
|
+
#define FN(X) EXPAND_CAT(X, HASHER())
|
244
|
+
|
245
|
+
#define HASHER() H10
|
246
|
+
#define BUCKET_BITS 17
|
247
|
+
#define MAX_TREE_SEARCH_DEPTH 64
|
248
|
+
#define MAX_TREE_COMP_LENGTH 128
|
249
|
+
#include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
|
250
|
+
#undef MAX_TREE_SEARCH_DEPTH
|
251
|
+
#undef MAX_TREE_COMP_LENGTH
|
252
|
+
#undef BUCKET_BITS
|
253
|
+
#undef HASHER
|
254
|
+
/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
|
255
|
+
#define MAX_NUM_MATCHES_H10 128
|
256
|
+
|
257
|
+
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
|
258
|
+
a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
259
|
+
and HTML inputs. */
|
260
|
+
|
261
|
+
#define HASHER() H2
|
262
|
+
#define BUCKET_BITS 16
|
263
|
+
#define BUCKET_SWEEP 1
|
264
|
+
#define HASH_LEN 5
|
265
|
+
#define USE_DICTIONARY 1
|
266
|
+
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
|
267
|
+
#undef BUCKET_SWEEP
|
268
|
+
#undef USE_DICTIONARY
|
269
|
+
#undef HASHER
|
270
|
+
|
271
|
+
#define HASHER() H3
|
272
|
+
#define BUCKET_SWEEP 2
|
273
|
+
#define USE_DICTIONARY 0
|
274
|
+
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
|
275
|
+
#undef USE_DICTIONARY
|
276
|
+
#undef BUCKET_SWEEP
|
277
|
+
#undef BUCKET_BITS
|
278
|
+
#undef HASHER
|
279
|
+
|
280
|
+
#define HASHER() H4
|
281
|
+
#define BUCKET_BITS 17
|
282
|
+
#define BUCKET_SWEEP 4
|
283
|
+
#define USE_DICTIONARY 1
|
284
|
+
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
|
285
|
+
#undef USE_DICTIONARY
|
286
|
+
#undef HASH_LEN
|
287
|
+
#undef BUCKET_SWEEP
|
288
|
+
#undef BUCKET_BITS
|
289
|
+
#undef HASHER
|
290
|
+
|
291
|
+
#define HASHER() H5
|
292
|
+
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
|
293
|
+
#undef HASHER
|
294
|
+
|
295
|
+
#define HASHER() H6
|
296
|
+
#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
|
297
|
+
#undef HASHER
|
298
|
+
|
299
|
+
#define BUCKET_BITS 15
|
300
|
+
|
301
|
+
#define NUM_LAST_DISTANCES_TO_CHECK 4
|
302
|
+
#define NUM_BANKS 1
|
303
|
+
#define BANK_BITS 16
|
304
|
+
#define HASHER() H40
|
305
|
+
#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
|
306
|
+
#undef HASHER
|
307
|
+
#undef NUM_LAST_DISTANCES_TO_CHECK
|
308
|
+
|
309
|
+
#define NUM_LAST_DISTANCES_TO_CHECK 10
|
310
|
+
#define HASHER() H41
|
311
|
+
#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
|
312
|
+
#undef HASHER
|
313
|
+
#undef NUM_LAST_DISTANCES_TO_CHECK
|
314
|
+
#undef NUM_BANKS
|
315
|
+
#undef BANK_BITS
|
316
|
+
|
317
|
+
#define NUM_LAST_DISTANCES_TO_CHECK 16
|
318
|
+
#define NUM_BANKS 512
|
319
|
+
#define BANK_BITS 9
|
320
|
+
#define HASHER() H42
|
321
|
+
#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
|
322
|
+
#undef HASHER
|
323
|
+
#undef NUM_LAST_DISTANCES_TO_CHECK
|
324
|
+
#undef NUM_BANKS
|
325
|
+
#undef BANK_BITS
|
326
|
+
|
327
|
+
#undef BUCKET_BITS
|
328
|
+
|
329
|
+
#define HASHER() H54
|
330
|
+
#define BUCKET_BITS 20
|
331
|
+
#define BUCKET_SWEEP 4
|
332
|
+
#define HASH_LEN 7
|
333
|
+
#define USE_DICTIONARY 0
|
334
|
+
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
|
335
|
+
#undef USE_DICTIONARY
|
336
|
+
#undef HASH_LEN
|
337
|
+
#undef BUCKET_SWEEP
|
338
|
+
#undef BUCKET_BITS
|
339
|
+
#undef HASHER
|
340
|
+
|
341
|
+
#undef FN
|
342
|
+
#undef CAT
|
343
|
+
#undef EXPAND_CAT
|
344
|
+
|
345
|
+
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
|
346
|
+
#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
|
347
|
+
|
348
|
+
static BROTLI_INLINE void DestroyHasher(
|
349
|
+
MemoryManager* m, HasherHandle* handle) {
|
350
|
+
if (*handle == NULL) return;
|
351
|
+
BROTLI_FREE(m, *handle);
|
352
|
+
}
|
353
|
+
|
354
|
+
static BROTLI_INLINE void HasherReset(HasherHandle handle) {
|
355
|
+
if (handle == NULL) return;
|
356
|
+
GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
|
357
|
+
}
|
358
|
+
|
359
|
+
static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
|
360
|
+
BROTLI_BOOL one_shot, const size_t input_size) {
|
361
|
+
size_t result = sizeof(HasherCommon);
|
362
|
+
switch (params->hasher.type) {
|
363
|
+
#define SIZE_(N) \
|
364
|
+
case N: \
|
365
|
+
result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
|
366
|
+
break;
|
367
|
+
FOR_ALL_HASHERS(SIZE_)
|
368
|
+
#undef SIZE_
|
369
|
+
default:
|
370
|
+
break;
|
371
|
+
}
|
372
|
+
return result;
|
373
|
+
}
|
374
|
+
|
375
|
+
static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
|
376
|
+
BrotliEncoderParams* params, const uint8_t* data, size_t position,
|
377
|
+
size_t input_size, BROTLI_BOOL is_last) {
|
378
|
+
HasherHandle self = NULL;
|
379
|
+
HasherCommon* common = NULL;
|
380
|
+
BROTLI_BOOL one_shot = (position == 0 && is_last);
|
381
|
+
if (*handle == NULL) {
|
382
|
+
size_t alloc_size;
|
383
|
+
ChooseHasher(params, ¶ms->hasher);
|
384
|
+
alloc_size = HasherSize(params, one_shot, input_size);
|
385
|
+
self = BROTLI_ALLOC(m, uint8_t, alloc_size);
|
386
|
+
if (BROTLI_IS_OOM(m)) return;
|
387
|
+
*handle = self;
|
388
|
+
common = GetHasherCommon(self);
|
389
|
+
common->params = params->hasher;
|
390
|
+
switch (common->params.type) {
|
391
|
+
#define INITIALIZE_(N) \
|
392
|
+
case N: \
|
393
|
+
InitializeH ## N(*handle, params); \
|
394
|
+
break;
|
395
|
+
FOR_ALL_HASHERS(INITIALIZE_);
|
396
|
+
#undef INITIALIZE_
|
397
|
+
default:
|
398
|
+
break;
|
399
|
+
}
|
400
|
+
HasherReset(*handle);
|
401
|
+
}
|
402
|
+
|
403
|
+
self = *handle;
|
404
|
+
common = GetHasherCommon(self);
|
405
|
+
if (!common->is_prepared_) {
|
406
|
+
switch (common->params.type) {
|
407
|
+
#define PREPARE_(N) \
|
408
|
+
case N: \
|
409
|
+
PrepareH ## N(self, one_shot, input_size, data); \
|
410
|
+
break;
|
411
|
+
FOR_ALL_HASHERS(PREPARE_)
|
412
|
+
#undef PREPARE_
|
413
|
+
default: break;
|
414
|
+
}
|
415
|
+
if (position == 0) {
|
416
|
+
common->dict_num_lookups = 0;
|
417
|
+
common->dict_num_matches = 0;
|
418
|
+
}
|
419
|
+
common->is_prepared_ = BROTLI_TRUE;
|
420
|
+
}
|
421
|
+
}
|
422
|
+
|
423
|
+
static BROTLI_INLINE void InitOrStitchToPreviousBlock(
|
424
|
+
MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
|
425
|
+
BrotliEncoderParams* params, size_t position, size_t input_size,
|
426
|
+
BROTLI_BOOL is_last) {
|
427
|
+
HasherHandle self;
|
428
|
+
HasherSetup(m, handle, params, data, position, input_size, is_last);
|
429
|
+
if (BROTLI_IS_OOM(m)) return;
|
430
|
+
self = *handle;
|
431
|
+
switch (GetHasherCommon(self)->params.type) {
|
432
|
+
#define INIT_(N) \
|
433
|
+
case N: \
|
434
|
+
StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
|
435
|
+
break;
|
436
|
+
FOR_ALL_HASHERS(INIT_)
|
437
|
+
#undef INIT_
|
438
|
+
default: break;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
|
442
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
443
|
+
} /* extern "C" */
|
444
|
+
#endif
|
445
|
+
|
446
|
+
#endif /* BROTLI_ENC_HASH_H_ */
|