extbrotli 0.0.1.PROTOTYPE
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Entropy encoding (Huffman) utilities.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
18
|
+
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <string.h>
|
22
|
+
#include <vector>
|
23
|
+
#include "./histogram.h"
|
24
|
+
#include "./prefix.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
// This function will create a Huffman tree.
|
29
|
+
//
|
30
|
+
// The (data,length) contains the population counts.
|
31
|
+
// The tree_limit is the maximum bit depth of the Huffman codes.
|
32
|
+
//
|
33
|
+
// The depth contains the tree, i.e., how many bits are used for
|
34
|
+
// the symbol.
|
35
|
+
//
|
36
|
+
// See http://en.wikipedia.org/wiki/Huffman_coding
|
37
|
+
void CreateHuffmanTree(const int *data,
|
38
|
+
const int length,
|
39
|
+
const int tree_limit,
|
40
|
+
uint8_t *depth);
|
41
|
+
|
42
|
+
// Change the population counts in a way that the consequent
|
43
|
+
// Huffman tree compression, especially its rle-part will be more
|
44
|
+
// likely to compress this data more efficiently.
|
45
|
+
//
|
46
|
+
// length contains the size of the histogram.
|
47
|
+
// counts contains the population counts.
|
48
|
+
int OptimizeHuffmanCountsForRle(int length, int* counts);
|
49
|
+
|
50
|
+
// Write a huffman tree from bit depths into the bitstream representation
|
51
|
+
// of a Huffman tree. The generated Huffman tree is to be compressed once
|
52
|
+
// more using a Huffman tree
|
53
|
+
void WriteHuffmanTree(const uint8_t* depth,
|
54
|
+
uint32_t num,
|
55
|
+
std::vector<uint8_t> *tree,
|
56
|
+
std::vector<uint8_t> *extra_bits_data);
|
57
|
+
|
58
|
+
// Get the actual bit values for a tree of bit depths.
|
59
|
+
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
|
60
|
+
|
61
|
+
template<int kSize>
|
62
|
+
struct EntropyCode {
|
63
|
+
// How many bits for symbol.
|
64
|
+
uint8_t depth_[kSize];
|
65
|
+
// Actual bits used to represent the symbol.
|
66
|
+
uint16_t bits_[kSize];
|
67
|
+
// How many non-zero depth.
|
68
|
+
int count_;
|
69
|
+
// First four symbols with non-zero depth.
|
70
|
+
int symbols_[4];
|
71
|
+
};
|
72
|
+
|
73
|
+
static const int kCodeLengthCodes = 18;
|
74
|
+
|
75
|
+
// Literal entropy code.
|
76
|
+
typedef EntropyCode<256> EntropyCodeLiteral;
|
77
|
+
// Prefix entropy codes.
|
78
|
+
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
|
79
|
+
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
|
80
|
+
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
|
81
|
+
// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
|
82
|
+
typedef EntropyCode<272> EntropyCodeContextMap;
|
83
|
+
// Block type entropy code, 256 block types + 2 special symbols.
|
84
|
+
typedef EntropyCode<258> EntropyCodeBlockType;
|
85
|
+
|
86
|
+
} // namespace brotli
|
87
|
+
|
88
|
+
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
|
@@ -0,0 +1,179 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Utilities for fast computation of logarithms.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_FAST_LOG_H_
|
18
|
+
#define BROTLI_ENC_FAST_LOG_H_
|
19
|
+
|
20
|
+
#include <assert.h>
|
21
|
+
#include <math.h>
|
22
|
+
#include <stdint.h>
|
23
|
+
|
24
|
+
namespace brotli {
|
25
|
+
|
26
|
+
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
27
|
+
inline int Log2Floor(uint32_t n) {
|
28
|
+
#if defined(__clang__) || \
|
29
|
+
(defined(__GNUC__) && \
|
30
|
+
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
|
31
|
+
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
|
32
|
+
#else
|
33
|
+
if (n == 0)
|
34
|
+
return -1;
|
35
|
+
int log = 0;
|
36
|
+
uint32_t value = n;
|
37
|
+
for (int i = 4; i >= 0; --i) {
|
38
|
+
int shift = (1 << i);
|
39
|
+
uint32_t x = value >> shift;
|
40
|
+
if (x != 0) {
|
41
|
+
value = x;
|
42
|
+
log += shift;
|
43
|
+
}
|
44
|
+
}
|
45
|
+
assert(value == 1);
|
46
|
+
return log;
|
47
|
+
#endif
|
48
|
+
}
|
49
|
+
|
50
|
+
static inline int Log2FloorNonZero(uint32_t n) {
|
51
|
+
#ifdef __GNUC__
|
52
|
+
return 31 ^ __builtin_clz(n);
|
53
|
+
#else
|
54
|
+
unsigned int result = 0;
|
55
|
+
while (n >>= 1) result++;
|
56
|
+
return result;
|
57
|
+
#endif
|
58
|
+
}
|
59
|
+
|
60
|
+
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
61
|
+
inline int Log2Ceiling(uint32_t n) {
|
62
|
+
int floor = Log2Floor(n);
|
63
|
+
if (n == (n &~ (n - 1))) // zero or a power of two
|
64
|
+
return floor;
|
65
|
+
else
|
66
|
+
return floor + 1;
|
67
|
+
}
|
68
|
+
|
69
|
+
// A lookup table for small values of log2(int) to be used in entropy
|
70
|
+
// computation.
|
71
|
+
//
|
72
|
+
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
|
73
|
+
static const float kLog2Table[] = {
|
74
|
+
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
75
|
+
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
76
|
+
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
|
77
|
+
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
|
78
|
+
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
|
79
|
+
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
|
80
|
+
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
|
81
|
+
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
|
82
|
+
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
|
83
|
+
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
|
84
|
+
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
|
85
|
+
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
|
86
|
+
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
|
87
|
+
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
|
88
|
+
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
|
89
|
+
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
|
90
|
+
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
|
91
|
+
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
|
92
|
+
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
|
93
|
+
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
|
94
|
+
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
|
95
|
+
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
|
96
|
+
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
|
97
|
+
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
|
98
|
+
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
|
99
|
+
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
|
100
|
+
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
|
101
|
+
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
|
102
|
+
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
|
103
|
+
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
|
104
|
+
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
|
105
|
+
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
|
106
|
+
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
|
107
|
+
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
|
108
|
+
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
|
109
|
+
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
|
110
|
+
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
|
111
|
+
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
|
112
|
+
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
|
113
|
+
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
|
114
|
+
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
|
115
|
+
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
|
116
|
+
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
|
117
|
+
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
|
118
|
+
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
|
119
|
+
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
|
120
|
+
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
|
121
|
+
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
|
122
|
+
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
|
123
|
+
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
|
124
|
+
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
|
125
|
+
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
|
126
|
+
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
|
127
|
+
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
|
128
|
+
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
|
129
|
+
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
|
130
|
+
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
|
131
|
+
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
|
132
|
+
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
|
133
|
+
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
|
134
|
+
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
|
135
|
+
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
|
136
|
+
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
|
137
|
+
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
|
138
|
+
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
|
139
|
+
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
|
140
|
+
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
|
141
|
+
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
|
142
|
+
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
|
143
|
+
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
|
144
|
+
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
|
145
|
+
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
|
146
|
+
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
|
147
|
+
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
|
148
|
+
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
|
149
|
+
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
|
150
|
+
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
|
151
|
+
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
|
152
|
+
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
|
153
|
+
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
|
154
|
+
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
|
155
|
+
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
|
156
|
+
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
|
157
|
+
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
|
158
|
+
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
|
159
|
+
7.9943534368588578f
|
160
|
+
};
|
161
|
+
|
162
|
+
// Faster logarithm for small integers, with the property of log2(0) == 0.
|
163
|
+
static inline double FastLog2(int v) {
|
164
|
+
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
|
165
|
+
return kLog2Table[v];
|
166
|
+
}
|
167
|
+
#if defined(_MSC_VER) && _MSC_VER <= 1600
|
168
|
+
// Visual Studio 2010 does not have the log2() function defined, so we use
|
169
|
+
// log() and a multiplication instead.
|
170
|
+
static const double kLog2Inv = 1.4426950408889634f;
|
171
|
+
return log(static_cast<double>(v)) * kLog2Inv;
|
172
|
+
#else
|
173
|
+
return log2(static_cast<double>(v));
|
174
|
+
#endif
|
175
|
+
}
|
176
|
+
|
177
|
+
} // namespace brotli
|
178
|
+
|
179
|
+
#endif // BROTLI_ENC_FAST_LOG_H_
|
@@ -0,0 +1,87 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Function to find maximal matching prefixes of strings.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
18
|
+
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
|
22
|
+
#include <stddef.h>
|
23
|
+
|
24
|
+
#include "./port.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
// Separate implementation for little-endian 64-bit targets, for speed.
|
29
|
+
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
|
30
|
+
|
31
|
+
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
|
32
|
+
const uint8_t* s2,
|
33
|
+
size_t limit) {
|
34
|
+
int matched = 0;
|
35
|
+
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
|
36
|
+
while (PREDICT_TRUE(--limit2)) {
|
37
|
+
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
|
38
|
+
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
|
39
|
+
s2 += 8;
|
40
|
+
matched += 8;
|
41
|
+
} else {
|
42
|
+
uint64_t x =
|
43
|
+
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
|
44
|
+
int matching_bits = __builtin_ctzll(x);
|
45
|
+
matched += matching_bits >> 3;
|
46
|
+
return matched;
|
47
|
+
}
|
48
|
+
}
|
49
|
+
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
|
50
|
+
while (--limit) {
|
51
|
+
if (PREDICT_TRUE(s1[matched] == *s2)) {
|
52
|
+
++s2;
|
53
|
+
++matched;
|
54
|
+
} else {
|
55
|
+
return matched;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
return matched;
|
59
|
+
}
|
60
|
+
#else
|
61
|
+
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
|
62
|
+
const uint8_t* s2,
|
63
|
+
size_t limit) {
|
64
|
+
int matched = 0;
|
65
|
+
const uint8_t* s2_limit = s2 + limit;
|
66
|
+
const uint8_t* s2_ptr = s2;
|
67
|
+
// Find out how long the match is. We loop over the data 32 bits at a
|
68
|
+
// time until we find a 32-bit block that doesn't match; then we find
|
69
|
+
// the first non-matching bit and use that to calculate the total
|
70
|
+
// length of the match.
|
71
|
+
while (s2_ptr <= s2_limit - 4 &&
|
72
|
+
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
|
73
|
+
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
|
74
|
+
s2_ptr += 4;
|
75
|
+
matched += 4;
|
76
|
+
}
|
77
|
+
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
|
78
|
+
++s2_ptr;
|
79
|
+
++matched;
|
80
|
+
}
|
81
|
+
return matched;
|
82
|
+
}
|
83
|
+
#endif
|
84
|
+
|
85
|
+
} // namespace brotli
|
86
|
+
|
87
|
+
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
@@ -0,0 +1,686 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// A (forgetful) hash table to the data seen by the compressor, to
|
16
|
+
// help create backward references to previous data.
|
17
|
+
|
18
|
+
#ifndef BROTLI_ENC_HASH_H_
|
19
|
+
#define BROTLI_ENC_HASH_H_
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdint.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <sys/types.h>
|
25
|
+
#include <algorithm>
|
26
|
+
#include <cstdlib>
|
27
|
+
#include <memory>
|
28
|
+
#include <string>
|
29
|
+
|
30
|
+
#include "./dictionary_hash.h"
|
31
|
+
#include "./fast_log.h"
|
32
|
+
#include "./find_match_length.h"
|
33
|
+
#include "./port.h"
|
34
|
+
#include "./prefix.h"
|
35
|
+
#include "./static_dict.h"
|
36
|
+
#include "./transform.h"
|
37
|
+
|
38
|
+
namespace brotli {
|
39
|
+
|
40
|
+
static const int kDistanceCacheIndex[] = {
|
41
|
+
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
42
|
+
};
|
43
|
+
static const int kDistanceCacheOffset[] = {
|
44
|
+
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
45
|
+
};
|
46
|
+
|
47
|
+
static const int kCutoffTransformsCount = 10;
|
48
|
+
static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
|
49
|
+
|
50
|
+
// kHashMul32 multiplier has these properties:
|
51
|
+
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
52
|
+
// * No long streaks of 1s or 0s.
|
53
|
+
// * There is no effort to ensure that it is a prime, the oddity is enough
|
54
|
+
// for this use.
|
55
|
+
// * The number has been tuned heuristically against compression benchmarks.
|
56
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
57
|
+
|
58
|
+
template<int kShiftBits>
|
59
|
+
inline uint32_t Hash(const uint8_t *data) {
|
60
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
61
|
+
// The higher bits contain more mixture from the multiplication,
|
62
|
+
// so we take our results from there.
|
63
|
+
return h >> (32 - kShiftBits);
|
64
|
+
}
|
65
|
+
|
66
|
+
// Usually, we always choose the longest backward reference. This function
|
67
|
+
// allows for the exception of that rule.
|
68
|
+
//
|
69
|
+
// If we choose a backward reference that is further away, it will
|
70
|
+
// usually be coded with more bits. We approximate this by assuming
|
71
|
+
// log2(distance). If the distance can be expressed in terms of the
|
72
|
+
// last four distances, we use some heuristic constants to estimate
|
73
|
+
// the bits cost. For the first up to four literals we use the bit
|
74
|
+
// cost of the literals from the literal cost model, after that we
|
75
|
+
// use the average bit cost of the cost model.
|
76
|
+
//
|
77
|
+
// This function is used to sometimes discard a longer backward reference
|
78
|
+
// when it is not much longer and the bit cost for encoding it is more
|
79
|
+
// than the saved literals.
|
80
|
+
inline double BackwardReferenceScore(int copy_length,
|
81
|
+
int backward_reference_offset) {
|
82
|
+
return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
|
83
|
+
}
|
84
|
+
|
85
|
+
inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
|
86
|
+
int distance_short_code) {
|
87
|
+
static const double kDistanceShortCodeBitCost[16] = {
|
88
|
+
-0.6, 0.95, 1.17, 1.27,
|
89
|
+
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
90
|
+
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
91
|
+
};
|
92
|
+
return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
|
93
|
+
}
|
94
|
+
|
95
|
+
struct BackwardMatch {
|
96
|
+
BackwardMatch() : distance(0), length_and_code(0) {}
|
97
|
+
|
98
|
+
BackwardMatch(int dist, int len)
|
99
|
+
: distance(dist), length_and_code((len << 5)) {}
|
100
|
+
|
101
|
+
BackwardMatch(int dist, int len, int len_code)
|
102
|
+
: distance(dist),
|
103
|
+
length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
|
104
|
+
|
105
|
+
int length() const {
|
106
|
+
return length_and_code >> 5;
|
107
|
+
}
|
108
|
+
int length_code() const {
|
109
|
+
int code = length_and_code & 31;
|
110
|
+
return code ? code : length();
|
111
|
+
}
|
112
|
+
|
113
|
+
int distance;
|
114
|
+
int length_and_code;
|
115
|
+
};
|
116
|
+
|
117
|
+
// A (forgetful) hash table to the data seen by the compressor, to
|
118
|
+
// help create backward references to previous data.
|
119
|
+
//
|
120
|
+
// This is a hash map of fixed size (kBucketSize). Starting from the
|
121
|
+
// given index, kBucketSweep buckets are used to store values of a key.
|
122
|
+
template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
|
123
|
+
class HashLongestMatchQuickly {
|
124
|
+
public:
|
125
|
+
HashLongestMatchQuickly() {
|
126
|
+
Reset();
|
127
|
+
}
|
128
|
+
void Reset() {
|
129
|
+
// It is not strictly necessary to fill this buffer here, but
|
130
|
+
// not filling will make the results of the compression stochastic
|
131
|
+
// (but correct). This is because random data would cause the
|
132
|
+
// system to find accidentally good backward references here and there.
|
133
|
+
memset(&buckets_[0], 0, sizeof(buckets_));
|
134
|
+
num_dict_lookups_ = 0;
|
135
|
+
num_dict_matches_ = 0;
|
136
|
+
}
|
137
|
+
// Look at 4 bytes at data.
|
138
|
+
// Compute a hash from these, and store the value somewhere within
|
139
|
+
// [ix .. ix+3].
|
140
|
+
inline void Store(const uint8_t *data, const int ix) {
|
141
|
+
const uint32_t key = HashBytes(data);
|
142
|
+
// Wiggle the value with the bucket sweep range.
|
143
|
+
const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
|
144
|
+
buckets_[key + off] = ix;
|
145
|
+
}
|
146
|
+
|
147
|
+
// Store hashes for a range of data.
|
148
|
+
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
149
|
+
for (int p = 0; p < len; ++p) {
|
150
|
+
Store(&data[p & mask], startix + p);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
155
|
+
// up to the length of max_length.
|
156
|
+
//
|
157
|
+
// Does not look for matches longer than max_length.
|
158
|
+
// Does not look for matches further away than max_backward.
|
159
|
+
// Writes the best found match length into best_len_out.
|
160
|
+
// Writes the index (&data[index]) of the start of the best match into
|
161
|
+
// best_distance_out.
|
162
|
+
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
|
163
|
+
const size_t ring_buffer_mask,
|
164
|
+
const int* __restrict distance_cache,
|
165
|
+
const uint32_t cur_ix,
|
166
|
+
const uint32_t max_length,
|
167
|
+
const uint32_t max_backward,
|
168
|
+
int * __restrict best_len_out,
|
169
|
+
int * __restrict best_len_code_out,
|
170
|
+
int * __restrict best_distance_out,
|
171
|
+
double* __restrict best_score_out) {
|
172
|
+
const int best_len_in = *best_len_out;
|
173
|
+
const int cur_ix_masked = cur_ix & ring_buffer_mask;
|
174
|
+
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
175
|
+
double best_score = *best_score_out;
|
176
|
+
int best_len = best_len_in;
|
177
|
+
int backward = distance_cache[0];
|
178
|
+
size_t prev_ix = cur_ix - backward;
|
179
|
+
bool match_found = false;
|
180
|
+
if (prev_ix < cur_ix) {
|
181
|
+
prev_ix &= ring_buffer_mask;
|
182
|
+
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
183
|
+
int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
184
|
+
&ring_buffer[cur_ix_masked],
|
185
|
+
max_length);
|
186
|
+
if (len >= 4) {
|
187
|
+
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
188
|
+
best_len = len;
|
189
|
+
*best_len_out = len;
|
190
|
+
*best_len_code_out = len;
|
191
|
+
*best_distance_out = backward;
|
192
|
+
*best_score_out = best_score;
|
193
|
+
compare_char = ring_buffer[cur_ix_masked + best_len];
|
194
|
+
if (kBucketSweep == 1) {
|
195
|
+
return true;
|
196
|
+
} else {
|
197
|
+
match_found = true;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
|
203
|
+
if (kBucketSweep == 1) {
|
204
|
+
// Only one to look for, don't bother to prepare for a loop.
|
205
|
+
prev_ix = buckets_[key];
|
206
|
+
backward = cur_ix - prev_ix;
|
207
|
+
prev_ix &= ring_buffer_mask;
|
208
|
+
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
209
|
+
return false;
|
210
|
+
}
|
211
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
212
|
+
return false;
|
213
|
+
}
|
214
|
+
const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
215
|
+
&ring_buffer[cur_ix_masked],
|
216
|
+
max_length);
|
217
|
+
if (len >= 4) {
|
218
|
+
*best_len_out = len;
|
219
|
+
*best_len_code_out = len;
|
220
|
+
*best_distance_out = backward;
|
221
|
+
*best_score_out = BackwardReferenceScore(len, backward);
|
222
|
+
return true;
|
223
|
+
}
|
224
|
+
} else {
|
225
|
+
uint32_t *bucket = buckets_ + key;
|
226
|
+
prev_ix = *bucket++;
|
227
|
+
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
228
|
+
const int backward = cur_ix - prev_ix;
|
229
|
+
prev_ix &= ring_buffer_mask;
|
230
|
+
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
231
|
+
continue;
|
232
|
+
}
|
233
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
234
|
+
continue;
|
235
|
+
}
|
236
|
+
const int len =
|
237
|
+
FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
238
|
+
&ring_buffer[cur_ix_masked],
|
239
|
+
max_length);
|
240
|
+
if (len >= 4) {
|
241
|
+
const double score = BackwardReferenceScore(len, backward);
|
242
|
+
if (best_score < score) {
|
243
|
+
best_score = score;
|
244
|
+
best_len = len;
|
245
|
+
*best_len_out = best_len;
|
246
|
+
*best_len_code_out = best_len;
|
247
|
+
*best_distance_out = backward;
|
248
|
+
*best_score_out = score;
|
249
|
+
compare_char = ring_buffer[cur_ix_masked + best_len];
|
250
|
+
match_found = true;
|
251
|
+
}
|
252
|
+
}
|
253
|
+
}
|
254
|
+
}
|
255
|
+
if (kUseDictionary && !match_found &&
|
256
|
+
num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
257
|
+
++num_dict_lookups_;
|
258
|
+
const uint32_t key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
|
259
|
+
const uint16_t v = kStaticDictionaryHash[key];
|
260
|
+
if (v > 0) {
|
261
|
+
const int len = v & 31;
|
262
|
+
const int dist = v >> 5;
|
263
|
+
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
264
|
+
if (len <= max_length) {
|
265
|
+
const int matchlen =
|
266
|
+
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
267
|
+
&kBrotliDictionary[offset], len);
|
268
|
+
if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
|
269
|
+
const int transform_id = kCutoffTransforms[len - matchlen];
|
270
|
+
const int word_id =
|
271
|
+
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
272
|
+
dist;
|
273
|
+
const size_t backward = max_backward + word_id + 1;
|
274
|
+
const double score = BackwardReferenceScore(matchlen, backward);
|
275
|
+
if (best_score < score) {
|
276
|
+
++num_dict_matches_;
|
277
|
+
best_score = score;
|
278
|
+
best_len = matchlen;
|
279
|
+
*best_len_out = best_len;
|
280
|
+
*best_len_code_out = len;
|
281
|
+
*best_distance_out = backward;
|
282
|
+
*best_score_out = best_score;
|
283
|
+
return true;
|
284
|
+
}
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
return match_found;
|
290
|
+
}
|
291
|
+
|
292
|
+
enum { kHashLength = 5 };
|
293
|
+
enum { kHashTypeLength = 8 };
|
294
|
+
// HashBytes is the function that chooses the bucket to place
|
295
|
+
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
296
|
+
// classes have separate, different implementations of hashing.
|
297
|
+
static uint32_t HashBytes(const uint8_t *data) {
|
298
|
+
// Computing a hash based on 5 bytes works much better for
|
299
|
+
// qualities 1 and 3, where the next hash value is likely to replace
|
300
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
301
|
+
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
|
302
|
+
// The higher bits contain more mixture from the multiplication,
|
303
|
+
// so we take our results from there.
|
304
|
+
return h >> (64 - kBucketBits);
|
305
|
+
}
|
306
|
+
|
307
|
+
private:
|
308
|
+
static const uint32_t kBucketSize = 1 << kBucketBits;
|
309
|
+
uint32_t buckets_[kBucketSize + kBucketSweep];
|
310
|
+
size_t num_dict_lookups_;
|
311
|
+
size_t num_dict_matches_;
|
312
|
+
};
|
313
|
+
|
314
|
+
// The maximum length for which the zopflification uses distinct distances.
|
315
|
+
static const int kMaxZopfliLen = 325;
|
316
|
+
|
317
|
+
// A (forgetful) hash table to the data seen by the compressor, to
|
318
|
+
// help create backward references to previous data.
|
319
|
+
//
|
320
|
+
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
321
|
+
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
322
|
+
// index positions of the given hash key in the compressed data.
|
323
|
+
template <int kBucketBits,
|
324
|
+
int kBlockBits,
|
325
|
+
int kNumLastDistancesToCheck>
|
326
|
+
class HashLongestMatch {
|
327
|
+
public:
|
328
|
+
HashLongestMatch() {
|
329
|
+
Reset();
|
330
|
+
}
|
331
|
+
|
332
|
+
void Reset() {
|
333
|
+
memset(&num_[0], 0, sizeof(num_));
|
334
|
+
num_dict_lookups_ = 0;
|
335
|
+
num_dict_matches_ = 0;
|
336
|
+
}
|
337
|
+
|
338
|
+
// Look at 3 bytes at data.
|
339
|
+
// Compute a hash from these, and store the value of ix at that position.
|
340
|
+
inline void Store(const uint8_t *data, const int ix) {
|
341
|
+
const uint32_t key = HashBytes(data);
|
342
|
+
const int minor_ix = num_[key] & kBlockMask;
|
343
|
+
buckets_[key][minor_ix] = ix;
|
344
|
+
++num_[key];
|
345
|
+
}
|
346
|
+
|
347
|
+
// Store hashes for a range of data.
|
348
|
+
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
349
|
+
for (int p = 0; p < len; ++p) {
|
350
|
+
Store(&data[p & mask], startix + p);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
// Find a longest backward match of &data[cur_ix] up to the length of
|
355
|
+
// max_length.
|
356
|
+
//
|
357
|
+
// Does not look for matches longer than max_length.
|
358
|
+
// Does not look for matches further away than max_backward.
|
359
|
+
// Writes the best found match length into best_len_out.
|
360
|
+
// Writes the index (&data[index]) offset from the start of the best match
|
361
|
+
// into best_distance_out.
|
362
|
+
// Write the score of the best match into best_score_out.
|
363
|
+
bool FindLongestMatch(const uint8_t * __restrict data,
|
364
|
+
const size_t ring_buffer_mask,
|
365
|
+
const int* __restrict distance_cache,
|
366
|
+
const uint32_t cur_ix,
|
367
|
+
uint32_t max_length,
|
368
|
+
const uint32_t max_backward,
|
369
|
+
int * __restrict best_len_out,
|
370
|
+
int * __restrict best_len_code_out,
|
371
|
+
int * __restrict best_distance_out,
|
372
|
+
double * __restrict best_score_out) {
|
373
|
+
*best_len_code_out = 0;
|
374
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
375
|
+
bool match_found = false;
|
376
|
+
// Don't accept a short copy from far away.
|
377
|
+
double best_score = *best_score_out;
|
378
|
+
int best_len = *best_len_out;
|
379
|
+
*best_len_out = 0;
|
380
|
+
// Try last distance first.
|
381
|
+
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
|
382
|
+
const int idx = kDistanceCacheIndex[i];
|
383
|
+
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
|
384
|
+
size_t prev_ix = cur_ix - backward;
|
385
|
+
if (prev_ix >= cur_ix) {
|
386
|
+
continue;
|
387
|
+
}
|
388
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
389
|
+
continue;
|
390
|
+
}
|
391
|
+
prev_ix &= ring_buffer_mask;
|
392
|
+
|
393
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
394
|
+
prev_ix + best_len > ring_buffer_mask ||
|
395
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
396
|
+
continue;
|
397
|
+
}
|
398
|
+
const size_t len =
|
399
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
400
|
+
max_length);
|
401
|
+
if (len >= 3 || (len == 2 && i < 2)) {
|
402
|
+
// Comparing for >= 2 does not change the semantics, but just saves for
|
403
|
+
// a few unnecessary binary logarithms in backward reference score,
|
404
|
+
// since we are not interested in such short matches.
|
405
|
+
double score = BackwardReferenceScoreUsingLastDistance(len, i);
|
406
|
+
if (best_score < score) {
|
407
|
+
best_score = score;
|
408
|
+
best_len = len;
|
409
|
+
*best_len_out = best_len;
|
410
|
+
*best_len_code_out = best_len;
|
411
|
+
*best_distance_out = backward;
|
412
|
+
*best_score_out = best_score;
|
413
|
+
match_found = true;
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
418
|
+
const int * __restrict const bucket = &buckets_[key][0];
|
419
|
+
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
420
|
+
for (int i = num_[key] - 1; i >= down; --i) {
|
421
|
+
int prev_ix = bucket[i & kBlockMask];
|
422
|
+
if (prev_ix >= 0) {
|
423
|
+
const size_t backward = cur_ix - prev_ix;
|
424
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
425
|
+
break;
|
426
|
+
}
|
427
|
+
prev_ix &= ring_buffer_mask;
|
428
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
429
|
+
prev_ix + best_len > ring_buffer_mask ||
|
430
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
431
|
+
continue;
|
432
|
+
}
|
433
|
+
const size_t len =
|
434
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
435
|
+
max_length);
|
436
|
+
if (len >= 4) {
|
437
|
+
// Comparing for >= 3 does not change the semantics, but just saves
|
438
|
+
// for a few unnecessary binary logarithms in backward reference
|
439
|
+
// score, since we are not interested in such short matches.
|
440
|
+
double score = BackwardReferenceScore(len, backward);
|
441
|
+
if (best_score < score) {
|
442
|
+
best_score = score;
|
443
|
+
best_len = len;
|
444
|
+
*best_len_out = best_len;
|
445
|
+
*best_len_code_out = best_len;
|
446
|
+
*best_distance_out = backward;
|
447
|
+
*best_score_out = best_score;
|
448
|
+
match_found = true;
|
449
|
+
}
|
450
|
+
}
|
451
|
+
}
|
452
|
+
}
|
453
|
+
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
454
|
+
uint32_t key = Hash<14>(&data[cur_ix_masked]) << 1;
|
455
|
+
for (int k = 0; k < 2; ++k, ++key) {
|
456
|
+
++num_dict_lookups_;
|
457
|
+
const uint16_t v = kStaticDictionaryHash[key];
|
458
|
+
if (v > 0) {
|
459
|
+
const int len = v & 31;
|
460
|
+
const int dist = v >> 5;
|
461
|
+
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
462
|
+
if (len <= max_length) {
|
463
|
+
const int matchlen =
|
464
|
+
FindMatchLengthWithLimit(&data[cur_ix_masked],
|
465
|
+
&kBrotliDictionary[offset], len);
|
466
|
+
if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
|
467
|
+
const int transform_id = kCutoffTransforms[len - matchlen];
|
468
|
+
const int word_id =
|
469
|
+
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
470
|
+
dist;
|
471
|
+
const size_t backward = max_backward + word_id + 1;
|
472
|
+
double score = BackwardReferenceScore(matchlen, backward);
|
473
|
+
if (best_score < score) {
|
474
|
+
++num_dict_matches_;
|
475
|
+
best_score = score;
|
476
|
+
best_len = matchlen;
|
477
|
+
*best_len_out = best_len;
|
478
|
+
*best_len_code_out = len;
|
479
|
+
*best_distance_out = backward;
|
480
|
+
*best_score_out = best_score;
|
481
|
+
match_found = true;
|
482
|
+
}
|
483
|
+
}
|
484
|
+
}
|
485
|
+
}
|
486
|
+
}
|
487
|
+
}
|
488
|
+
return match_found;
|
489
|
+
}
|
490
|
+
|
491
|
+
// Similar to FindLongestMatch(), but finds all matches.
|
492
|
+
//
|
493
|
+
// Sets *num_matches to the number of matches found, and stores the found
|
494
|
+
// matches in matches[0] to matches[*num_matches - 1].
|
495
|
+
//
|
496
|
+
// If the longest match is longer than kMaxZopfliLen, returns only this
|
497
|
+
// longest match.
|
498
|
+
//
|
499
|
+
// Requires that at least kMaxZopfliLen space is available in matches.
|
500
|
+
void FindAllMatches(const uint8_t* data,
|
501
|
+
const size_t ring_buffer_mask,
|
502
|
+
const uint32_t cur_ix,
|
503
|
+
uint32_t max_length,
|
504
|
+
const uint32_t max_backward,
|
505
|
+
int* num_matches,
|
506
|
+
BackwardMatch* matches) const {
|
507
|
+
BackwardMatch* const orig_matches = matches;
|
508
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
509
|
+
int best_len = 1;
|
510
|
+
int stop = static_cast<int>(cur_ix) - 64;
|
511
|
+
if (stop < 0) { stop = 0; }
|
512
|
+
for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
513
|
+
size_t prev_ix = i;
|
514
|
+
const size_t backward = cur_ix - prev_ix;
|
515
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
516
|
+
break;
|
517
|
+
}
|
518
|
+
prev_ix &= ring_buffer_mask;
|
519
|
+
if (data[cur_ix_masked] != data[prev_ix] ||
|
520
|
+
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
521
|
+
continue;
|
522
|
+
}
|
523
|
+
const size_t len =
|
524
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
525
|
+
max_length);
|
526
|
+
if (len > best_len) {
|
527
|
+
best_len = len;
|
528
|
+
if (len > kMaxZopfliLen) {
|
529
|
+
matches = orig_matches;
|
530
|
+
}
|
531
|
+
*matches++ = BackwardMatch(backward, len);
|
532
|
+
}
|
533
|
+
}
|
534
|
+
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
535
|
+
const int * __restrict const bucket = &buckets_[key][0];
|
536
|
+
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
537
|
+
for (int i = num_[key] - 1; i >= down; --i) {
|
538
|
+
int prev_ix = bucket[i & kBlockMask];
|
539
|
+
if (prev_ix >= 0) {
|
540
|
+
const size_t backward = cur_ix - prev_ix;
|
541
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
542
|
+
break;
|
543
|
+
}
|
544
|
+
prev_ix &= ring_buffer_mask;
|
545
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
546
|
+
prev_ix + best_len > ring_buffer_mask ||
|
547
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
548
|
+
continue;
|
549
|
+
}
|
550
|
+
const size_t len =
|
551
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
552
|
+
max_length);
|
553
|
+
if (len > best_len) {
|
554
|
+
best_len = len;
|
555
|
+
if (len > kMaxZopfliLen) {
|
556
|
+
matches = orig_matches;
|
557
|
+
}
|
558
|
+
*matches++ = BackwardMatch(backward, len);
|
559
|
+
}
|
560
|
+
}
|
561
|
+
}
|
562
|
+
std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
|
563
|
+
int minlen = std::max<int>(4, best_len + 1);
|
564
|
+
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
565
|
+
&dict_matches[0])) {
|
566
|
+
int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
|
567
|
+
for (int l = minlen; l <= maxlen; ++l) {
|
568
|
+
int dict_id = dict_matches[l];
|
569
|
+
if (dict_id < kInvalidMatch) {
|
570
|
+
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
|
571
|
+
dict_id & 31);
|
572
|
+
}
|
573
|
+
}
|
574
|
+
}
|
575
|
+
*num_matches += matches - orig_matches;
|
576
|
+
}
|
577
|
+
|
578
|
+
enum { kHashLength = 4 };
|
579
|
+
enum { kHashTypeLength = 4 };
|
580
|
+
|
581
|
+
// HashBytes is the function that chooses the bucket to place
|
582
|
+
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
583
|
+
// classes have separate, different implementations of hashing.
|
584
|
+
static uint32_t HashBytes(const uint8_t *data) {
|
585
|
+
// kHashMul32 multiplier has these properties:
|
586
|
+
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
587
|
+
// * No long streaks of 1s or 0s.
|
588
|
+
// * Is not unfortunate (see the unittest) for the English language.
|
589
|
+
// * There is no effort to ensure that it is a prime, the oddity is enough
|
590
|
+
// for this use.
|
591
|
+
// * The number has been tuned heuristically against compression benchmarks.
|
592
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
593
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
594
|
+
// The higher bits contain more mixture from the multiplication,
|
595
|
+
// so we take our results from there.
|
596
|
+
return h >> (32 - kBucketBits);
|
597
|
+
}
|
598
|
+
|
599
|
+
private:
|
600
|
+
// Number of hash buckets.
|
601
|
+
static const uint32_t kBucketSize = 1 << kBucketBits;
|
602
|
+
|
603
|
+
// Only kBlockSize newest backward references are kept,
|
604
|
+
// and the older are forgotten.
|
605
|
+
static const uint32_t kBlockSize = 1 << kBlockBits;
|
606
|
+
|
607
|
+
// Mask for accessing entries in a block (in a ringbuffer manner).
|
608
|
+
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
|
609
|
+
|
610
|
+
// Number of entries in a particular bucket.
|
611
|
+
uint16_t num_[kBucketSize];
|
612
|
+
|
613
|
+
// Buckets containing kBlockSize of backward references.
|
614
|
+
int buckets_[kBucketSize][kBlockSize];
|
615
|
+
|
616
|
+
size_t num_dict_lookups_;
|
617
|
+
size_t num_dict_matches_;
|
618
|
+
};
|
619
|
+
|
620
|
+
struct Hashers {
|
621
|
+
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
|
622
|
+
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
623
|
+
// and html inputs.
|
624
|
+
typedef HashLongestMatchQuickly<16, 1, true> H1;
|
625
|
+
typedef HashLongestMatchQuickly<16, 2, false> H2;
|
626
|
+
typedef HashLongestMatchQuickly<16, 4, false> H3;
|
627
|
+
typedef HashLongestMatchQuickly<17, 4, true> H4;
|
628
|
+
typedef HashLongestMatch<14, 4, 4> H5;
|
629
|
+
typedef HashLongestMatch<14, 5, 4> H6;
|
630
|
+
typedef HashLongestMatch<15, 6, 10> H7;
|
631
|
+
typedef HashLongestMatch<15, 7, 10> H8;
|
632
|
+
typedef HashLongestMatch<15, 8, 16> H9;
|
633
|
+
|
634
|
+
void Init(int type) {
|
635
|
+
switch (type) {
|
636
|
+
case 1: hash_h1.reset(new H1); break;
|
637
|
+
case 2: hash_h2.reset(new H2); break;
|
638
|
+
case 3: hash_h3.reset(new H3); break;
|
639
|
+
case 4: hash_h4.reset(new H4); break;
|
640
|
+
case 5: hash_h5.reset(new H5); break;
|
641
|
+
case 6: hash_h6.reset(new H6); break;
|
642
|
+
case 7: hash_h7.reset(new H7); break;
|
643
|
+
case 8: hash_h8.reset(new H8); break;
|
644
|
+
case 9: hash_h9.reset(new H9); break;
|
645
|
+
default: break;
|
646
|
+
}
|
647
|
+
}
|
648
|
+
|
649
|
+
template<typename Hasher>
|
650
|
+
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
651
|
+
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
|
652
|
+
hasher->Store(dict, i);
|
653
|
+
}
|
654
|
+
}
|
655
|
+
|
656
|
+
// Custom LZ77 window.
|
657
|
+
void PrependCustomDictionary(
|
658
|
+
int type, const size_t size, const uint8_t* dict) {
|
659
|
+
switch (type) {
|
660
|
+
case 1: WarmupHash(size, dict, hash_h1.get()); break;
|
661
|
+
case 2: WarmupHash(size, dict, hash_h2.get()); break;
|
662
|
+
case 3: WarmupHash(size, dict, hash_h3.get()); break;
|
663
|
+
case 4: WarmupHash(size, dict, hash_h4.get()); break;
|
664
|
+
case 5: WarmupHash(size, dict, hash_h5.get()); break;
|
665
|
+
case 6: WarmupHash(size, dict, hash_h6.get()); break;
|
666
|
+
case 7: WarmupHash(size, dict, hash_h7.get()); break;
|
667
|
+
case 8: WarmupHash(size, dict, hash_h8.get()); break;
|
668
|
+
case 9: WarmupHash(size, dict, hash_h9.get()); break;
|
669
|
+
default: break;
|
670
|
+
}
|
671
|
+
}
|
672
|
+
|
673
|
+
std::unique_ptr<H1> hash_h1;
|
674
|
+
std::unique_ptr<H2> hash_h2;
|
675
|
+
std::unique_ptr<H3> hash_h3;
|
676
|
+
std::unique_ptr<H4> hash_h4;
|
677
|
+
std::unique_ptr<H5> hash_h5;
|
678
|
+
std::unique_ptr<H6> hash_h6;
|
679
|
+
std::unique_ptr<H7> hash_h7;
|
680
|
+
std::unique_ptr<H8> hash_h8;
|
681
|
+
std::unique_ptr<H9> hash_h9;
|
682
|
+
};
|
683
|
+
|
684
|
+
} // namespace brotli
|
685
|
+
|
686
|
+
#endif // BROTLI_ENC_HASH_H_
|