extbrotli 0.0.1.PROTOTYPE
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Entropy encoding (Huffman) utilities.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
18
|
+
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <string.h>
|
22
|
+
#include <vector>
|
23
|
+
#include "./histogram.h"
|
24
|
+
#include "./prefix.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
// This function will create a Huffman tree.
|
29
|
+
//
|
30
|
+
// The (data,length) contains the population counts.
|
31
|
+
// The tree_limit is the maximum bit depth of the Huffman codes.
|
32
|
+
//
|
33
|
+
// The depth contains the tree, i.e., how many bits are used for
|
34
|
+
// the symbol.
|
35
|
+
//
|
36
|
+
// See http://en.wikipedia.org/wiki/Huffman_coding
|
37
|
+
void CreateHuffmanTree(const int *data,
|
38
|
+
const int length,
|
39
|
+
const int tree_limit,
|
40
|
+
uint8_t *depth);
|
41
|
+
|
42
|
+
// Change the population counts in a way that the consequent
|
43
|
+
// Huffman tree compression, especially its rle-part will be more
|
44
|
+
// likely to compress this data more efficiently.
|
45
|
+
//
|
46
|
+
// length contains the size of the histogram.
|
47
|
+
// counts contains the population counts.
|
48
|
+
int OptimizeHuffmanCountsForRle(int length, int* counts);
|
49
|
+
|
50
|
+
// Write a huffman tree from bit depths into the bitstream representation
|
51
|
+
// of a Huffman tree. The generated Huffman tree is to be compressed once
|
52
|
+
// more using a Huffman tree
|
53
|
+
void WriteHuffmanTree(const uint8_t* depth,
|
54
|
+
uint32_t num,
|
55
|
+
std::vector<uint8_t> *tree,
|
56
|
+
std::vector<uint8_t> *extra_bits_data);
|
57
|
+
|
58
|
+
// Get the actual bit values for a tree of bit depths.
|
59
|
+
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
|
60
|
+
|
61
|
+
template<int kSize>
|
62
|
+
struct EntropyCode {
|
63
|
+
// How many bits for symbol.
|
64
|
+
uint8_t depth_[kSize];
|
65
|
+
// Actual bits used to represent the symbol.
|
66
|
+
uint16_t bits_[kSize];
|
67
|
+
// How many non-zero depth.
|
68
|
+
int count_;
|
69
|
+
// First four symbols with non-zero depth.
|
70
|
+
int symbols_[4];
|
71
|
+
};
|
72
|
+
|
73
|
+
static const int kCodeLengthCodes = 18;
|
74
|
+
|
75
|
+
// Literal entropy code.
|
76
|
+
typedef EntropyCode<256> EntropyCodeLiteral;
|
77
|
+
// Prefix entropy codes.
|
78
|
+
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
|
79
|
+
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
|
80
|
+
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
|
81
|
+
// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
|
82
|
+
typedef EntropyCode<272> EntropyCodeContextMap;
|
83
|
+
// Block type entropy code, 256 block types + 2 special symbols.
|
84
|
+
typedef EntropyCode<258> EntropyCodeBlockType;
|
85
|
+
|
86
|
+
} // namespace brotli
|
87
|
+
|
88
|
+
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
|
@@ -0,0 +1,179 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Utilities for fast computation of logarithms.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_FAST_LOG_H_
|
18
|
+
#define BROTLI_ENC_FAST_LOG_H_
|
19
|
+
|
20
|
+
#include <assert.h>
|
21
|
+
#include <math.h>
|
22
|
+
#include <stdint.h>
|
23
|
+
|
24
|
+
namespace brotli {
|
25
|
+
|
26
|
+
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
27
|
+
inline int Log2Floor(uint32_t n) {
|
28
|
+
#if defined(__clang__) || \
|
29
|
+
(defined(__GNUC__) && \
|
30
|
+
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
|
31
|
+
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
|
32
|
+
#else
|
33
|
+
if (n == 0)
|
34
|
+
return -1;
|
35
|
+
int log = 0;
|
36
|
+
uint32_t value = n;
|
37
|
+
for (int i = 4; i >= 0; --i) {
|
38
|
+
int shift = (1 << i);
|
39
|
+
uint32_t x = value >> shift;
|
40
|
+
if (x != 0) {
|
41
|
+
value = x;
|
42
|
+
log += shift;
|
43
|
+
}
|
44
|
+
}
|
45
|
+
assert(value == 1);
|
46
|
+
return log;
|
47
|
+
#endif
|
48
|
+
}
|
49
|
+
|
50
|
+
static inline int Log2FloorNonZero(uint32_t n) {
|
51
|
+
#ifdef __GNUC__
|
52
|
+
return 31 ^ __builtin_clz(n);
|
53
|
+
#else
|
54
|
+
unsigned int result = 0;
|
55
|
+
while (n >>= 1) result++;
|
56
|
+
return result;
|
57
|
+
#endif
|
58
|
+
}
|
59
|
+
|
60
|
+
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
61
|
+
inline int Log2Ceiling(uint32_t n) {
|
62
|
+
int floor = Log2Floor(n);
|
63
|
+
if (n == (n &~ (n - 1))) // zero or a power of two
|
64
|
+
return floor;
|
65
|
+
else
|
66
|
+
return floor + 1;
|
67
|
+
}
|
68
|
+
|
69
|
+
// A lookup table for small values of log2(int) to be used in entropy
|
70
|
+
// computation.
|
71
|
+
//
|
72
|
+
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
|
73
|
+
static const float kLog2Table[] = {
|
74
|
+
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
75
|
+
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
76
|
+
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
|
77
|
+
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
|
78
|
+
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
|
79
|
+
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
|
80
|
+
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
|
81
|
+
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
|
82
|
+
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
|
83
|
+
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
|
84
|
+
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
|
85
|
+
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
|
86
|
+
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
|
87
|
+
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
|
88
|
+
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
|
89
|
+
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
|
90
|
+
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
|
91
|
+
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
|
92
|
+
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
|
93
|
+
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
|
94
|
+
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
|
95
|
+
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
|
96
|
+
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
|
97
|
+
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
|
98
|
+
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
|
99
|
+
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
|
100
|
+
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
|
101
|
+
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
|
102
|
+
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
|
103
|
+
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
|
104
|
+
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
|
105
|
+
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
|
106
|
+
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
|
107
|
+
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
|
108
|
+
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
|
109
|
+
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
|
110
|
+
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
|
111
|
+
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
|
112
|
+
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
|
113
|
+
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
|
114
|
+
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
|
115
|
+
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
|
116
|
+
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
|
117
|
+
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
|
118
|
+
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
|
119
|
+
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
|
120
|
+
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
|
121
|
+
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
|
122
|
+
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
|
123
|
+
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
|
124
|
+
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
|
125
|
+
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
|
126
|
+
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
|
127
|
+
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
|
128
|
+
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
|
129
|
+
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
|
130
|
+
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
|
131
|
+
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
|
132
|
+
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
|
133
|
+
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
|
134
|
+
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
|
135
|
+
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
|
136
|
+
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
|
137
|
+
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
|
138
|
+
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
|
139
|
+
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
|
140
|
+
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
|
141
|
+
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
|
142
|
+
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
|
143
|
+
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
|
144
|
+
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
|
145
|
+
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
|
146
|
+
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
|
147
|
+
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
|
148
|
+
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
|
149
|
+
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
|
150
|
+
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
|
151
|
+
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
|
152
|
+
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
|
153
|
+
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
|
154
|
+
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
|
155
|
+
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
|
156
|
+
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
|
157
|
+
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
|
158
|
+
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
|
159
|
+
7.9943534368588578f
|
160
|
+
};
|
161
|
+
|
162
|
+
// Faster logarithm for small integers, with the property of log2(0) == 0.
|
163
|
+
static inline double FastLog2(int v) {
|
164
|
+
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
|
165
|
+
return kLog2Table[v];
|
166
|
+
}
|
167
|
+
#if defined(_MSC_VER) && _MSC_VER <= 1600
|
168
|
+
// Visual Studio 2010 does not have the log2() function defined, so we use
|
169
|
+
// log() and a multiplication instead.
|
170
|
+
static const double kLog2Inv = 1.4426950408889634f;
|
171
|
+
return log(static_cast<double>(v)) * kLog2Inv;
|
172
|
+
#else
|
173
|
+
return log2(static_cast<double>(v));
|
174
|
+
#endif
|
175
|
+
}
|
176
|
+
|
177
|
+
} // namespace brotli
|
178
|
+
|
179
|
+
#endif // BROTLI_ENC_FAST_LOG_H_
|
@@ -0,0 +1,87 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Function to find maximal matching prefixes of strings.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
18
|
+
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
19
|
+
|
20
|
+
#include <stdint.h>
|
21
|
+
|
22
|
+
#include <stddef.h>
|
23
|
+
|
24
|
+
#include "./port.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
// Separate implementation for little-endian 64-bit targets, for speed.
|
29
|
+
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
|
30
|
+
|
31
|
+
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
|
32
|
+
const uint8_t* s2,
|
33
|
+
size_t limit) {
|
34
|
+
int matched = 0;
|
35
|
+
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
|
36
|
+
while (PREDICT_TRUE(--limit2)) {
|
37
|
+
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
|
38
|
+
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
|
39
|
+
s2 += 8;
|
40
|
+
matched += 8;
|
41
|
+
} else {
|
42
|
+
uint64_t x =
|
43
|
+
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
|
44
|
+
int matching_bits = __builtin_ctzll(x);
|
45
|
+
matched += matching_bits >> 3;
|
46
|
+
return matched;
|
47
|
+
}
|
48
|
+
}
|
49
|
+
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
|
50
|
+
while (--limit) {
|
51
|
+
if (PREDICT_TRUE(s1[matched] == *s2)) {
|
52
|
+
++s2;
|
53
|
+
++matched;
|
54
|
+
} else {
|
55
|
+
return matched;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
return matched;
|
59
|
+
}
|
60
|
+
#else
|
61
|
+
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
|
62
|
+
const uint8_t* s2,
|
63
|
+
size_t limit) {
|
64
|
+
int matched = 0;
|
65
|
+
const uint8_t* s2_limit = s2 + limit;
|
66
|
+
const uint8_t* s2_ptr = s2;
|
67
|
+
// Find out how long the match is. We loop over the data 32 bits at a
|
68
|
+
// time until we find a 32-bit block that doesn't match; then we find
|
69
|
+
// the first non-matching bit and use that to calculate the total
|
70
|
+
// length of the match.
|
71
|
+
while (s2_ptr <= s2_limit - 4 &&
|
72
|
+
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
|
73
|
+
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
|
74
|
+
s2_ptr += 4;
|
75
|
+
matched += 4;
|
76
|
+
}
|
77
|
+
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
|
78
|
+
++s2_ptr;
|
79
|
+
++matched;
|
80
|
+
}
|
81
|
+
return matched;
|
82
|
+
}
|
83
|
+
#endif
|
84
|
+
|
85
|
+
} // namespace brotli
|
86
|
+
|
87
|
+
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
@@ -0,0 +1,686 @@
|
|
1
|
+
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// A (forgetful) hash table to the data seen by the compressor, to
|
16
|
+
// help create backward references to previous data.
|
17
|
+
|
18
|
+
#ifndef BROTLI_ENC_HASH_H_
|
19
|
+
#define BROTLI_ENC_HASH_H_
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdint.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <sys/types.h>
|
25
|
+
#include <algorithm>
|
26
|
+
#include <cstdlib>
|
27
|
+
#include <memory>
|
28
|
+
#include <string>
|
29
|
+
|
30
|
+
#include "./dictionary_hash.h"
|
31
|
+
#include "./fast_log.h"
|
32
|
+
#include "./find_match_length.h"
|
33
|
+
#include "./port.h"
|
34
|
+
#include "./prefix.h"
|
35
|
+
#include "./static_dict.h"
|
36
|
+
#include "./transform.h"
|
37
|
+
|
38
|
+
namespace brotli {
|
39
|
+
|
40
|
+
static const int kDistanceCacheIndex[] = {
|
41
|
+
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
42
|
+
};
|
43
|
+
static const int kDistanceCacheOffset[] = {
|
44
|
+
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
45
|
+
};
|
46
|
+
|
47
|
+
static const int kCutoffTransformsCount = 10;
|
48
|
+
static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
|
49
|
+
|
50
|
+
// kHashMul32 multiplier has these properties:
|
51
|
+
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
52
|
+
// * No long streaks of 1s or 0s.
|
53
|
+
// * There is no effort to ensure that it is a prime, the oddity is enough
|
54
|
+
// for this use.
|
55
|
+
// * The number has been tuned heuristically against compression benchmarks.
|
56
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
57
|
+
|
58
|
+
template<int kShiftBits>
|
59
|
+
inline uint32_t Hash(const uint8_t *data) {
|
60
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
61
|
+
// The higher bits contain more mixture from the multiplication,
|
62
|
+
// so we take our results from there.
|
63
|
+
return h >> (32 - kShiftBits);
|
64
|
+
}
|
65
|
+
|
66
|
+
// Usually, we always choose the longest backward reference. This function
|
67
|
+
// allows for the exception of that rule.
|
68
|
+
//
|
69
|
+
// If we choose a backward reference that is further away, it will
|
70
|
+
// usually be coded with more bits. We approximate this by assuming
|
71
|
+
// log2(distance). If the distance can be expressed in terms of the
|
72
|
+
// last four distances, we use some heuristic constants to estimate
|
73
|
+
// the bits cost. For the first up to four literals we use the bit
|
74
|
+
// cost of the literals from the literal cost model, after that we
|
75
|
+
// use the average bit cost of the cost model.
|
76
|
+
//
|
77
|
+
// This function is used to sometimes discard a longer backward reference
|
78
|
+
// when it is not much longer and the bit cost for encoding it is more
|
79
|
+
// than the saved literals.
|
80
|
+
inline double BackwardReferenceScore(int copy_length,
|
81
|
+
int backward_reference_offset) {
|
82
|
+
return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
|
83
|
+
}
|
84
|
+
|
85
|
+
inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
|
86
|
+
int distance_short_code) {
|
87
|
+
static const double kDistanceShortCodeBitCost[16] = {
|
88
|
+
-0.6, 0.95, 1.17, 1.27,
|
89
|
+
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
90
|
+
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
91
|
+
};
|
92
|
+
return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
|
93
|
+
}
|
94
|
+
|
95
|
+
struct BackwardMatch {
|
96
|
+
BackwardMatch() : distance(0), length_and_code(0) {}
|
97
|
+
|
98
|
+
BackwardMatch(int dist, int len)
|
99
|
+
: distance(dist), length_and_code((len << 5)) {}
|
100
|
+
|
101
|
+
BackwardMatch(int dist, int len, int len_code)
|
102
|
+
: distance(dist),
|
103
|
+
length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
|
104
|
+
|
105
|
+
int length() const {
|
106
|
+
return length_and_code >> 5;
|
107
|
+
}
|
108
|
+
int length_code() const {
|
109
|
+
int code = length_and_code & 31;
|
110
|
+
return code ? code : length();
|
111
|
+
}
|
112
|
+
|
113
|
+
int distance;
|
114
|
+
int length_and_code;
|
115
|
+
};
|
116
|
+
|
117
|
+
// A (forgetful) hash table to the data seen by the compressor, to
|
118
|
+
// help create backward references to previous data.
|
119
|
+
//
|
120
|
+
// This is a hash map of fixed size (kBucketSize). Starting from the
|
121
|
+
// given index, kBucketSweep buckets are used to store values of a key.
|
122
|
+
template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
|
123
|
+
class HashLongestMatchQuickly {
|
124
|
+
public:
|
125
|
+
HashLongestMatchQuickly() {
|
126
|
+
Reset();
|
127
|
+
}
|
128
|
+
void Reset() {
|
129
|
+
// It is not strictly necessary to fill this buffer here, but
|
130
|
+
// not filling will make the results of the compression stochastic
|
131
|
+
// (but correct). This is because random data would cause the
|
132
|
+
// system to find accidentally good backward references here and there.
|
133
|
+
memset(&buckets_[0], 0, sizeof(buckets_));
|
134
|
+
num_dict_lookups_ = 0;
|
135
|
+
num_dict_matches_ = 0;
|
136
|
+
}
|
137
|
+
// Look at 4 bytes at data.
|
138
|
+
// Compute a hash from these, and store the value somewhere within
|
139
|
+
// [ix .. ix+3].
|
140
|
+
inline void Store(const uint8_t *data, const int ix) {
|
141
|
+
const uint32_t key = HashBytes(data);
|
142
|
+
// Wiggle the value with the bucket sweep range.
|
143
|
+
const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
|
144
|
+
buckets_[key + off] = ix;
|
145
|
+
}
|
146
|
+
|
147
|
+
// Store hashes for a range of data.
|
148
|
+
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
149
|
+
for (int p = 0; p < len; ++p) {
|
150
|
+
Store(&data[p & mask], startix + p);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
155
|
+
// up to the length of max_length.
|
156
|
+
//
|
157
|
+
// Does not look for matches longer than max_length.
|
158
|
+
// Does not look for matches further away than max_backward.
|
159
|
+
// Writes the best found match length into best_len_out.
|
160
|
+
// Writes the index (&data[index]) of the start of the best match into
|
161
|
+
// best_distance_out.
|
162
|
+
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
|
163
|
+
const size_t ring_buffer_mask,
|
164
|
+
const int* __restrict distance_cache,
|
165
|
+
const uint32_t cur_ix,
|
166
|
+
const uint32_t max_length,
|
167
|
+
const uint32_t max_backward,
|
168
|
+
int * __restrict best_len_out,
|
169
|
+
int * __restrict best_len_code_out,
|
170
|
+
int * __restrict best_distance_out,
|
171
|
+
double* __restrict best_score_out) {
|
172
|
+
const int best_len_in = *best_len_out;
|
173
|
+
const int cur_ix_masked = cur_ix & ring_buffer_mask;
|
174
|
+
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
175
|
+
double best_score = *best_score_out;
|
176
|
+
int best_len = best_len_in;
|
177
|
+
int backward = distance_cache[0];
|
178
|
+
size_t prev_ix = cur_ix - backward;
|
179
|
+
bool match_found = false;
|
180
|
+
if (prev_ix < cur_ix) {
|
181
|
+
prev_ix &= ring_buffer_mask;
|
182
|
+
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
183
|
+
int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
184
|
+
&ring_buffer[cur_ix_masked],
|
185
|
+
max_length);
|
186
|
+
if (len >= 4) {
|
187
|
+
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
188
|
+
best_len = len;
|
189
|
+
*best_len_out = len;
|
190
|
+
*best_len_code_out = len;
|
191
|
+
*best_distance_out = backward;
|
192
|
+
*best_score_out = best_score;
|
193
|
+
compare_char = ring_buffer[cur_ix_masked + best_len];
|
194
|
+
if (kBucketSweep == 1) {
|
195
|
+
return true;
|
196
|
+
} else {
|
197
|
+
match_found = true;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
|
203
|
+
if (kBucketSweep == 1) {
|
204
|
+
// Only one to look for, don't bother to prepare for a loop.
|
205
|
+
prev_ix = buckets_[key];
|
206
|
+
backward = cur_ix - prev_ix;
|
207
|
+
prev_ix &= ring_buffer_mask;
|
208
|
+
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
209
|
+
return false;
|
210
|
+
}
|
211
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
212
|
+
return false;
|
213
|
+
}
|
214
|
+
const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
215
|
+
&ring_buffer[cur_ix_masked],
|
216
|
+
max_length);
|
217
|
+
if (len >= 4) {
|
218
|
+
*best_len_out = len;
|
219
|
+
*best_len_code_out = len;
|
220
|
+
*best_distance_out = backward;
|
221
|
+
*best_score_out = BackwardReferenceScore(len, backward);
|
222
|
+
return true;
|
223
|
+
}
|
224
|
+
} else {
|
225
|
+
uint32_t *bucket = buckets_ + key;
|
226
|
+
prev_ix = *bucket++;
|
227
|
+
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
228
|
+
const int backward = cur_ix - prev_ix;
|
229
|
+
prev_ix &= ring_buffer_mask;
|
230
|
+
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
231
|
+
continue;
|
232
|
+
}
|
233
|
+
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
234
|
+
continue;
|
235
|
+
}
|
236
|
+
const int len =
|
237
|
+
FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
238
|
+
&ring_buffer[cur_ix_masked],
|
239
|
+
max_length);
|
240
|
+
if (len >= 4) {
|
241
|
+
const double score = BackwardReferenceScore(len, backward);
|
242
|
+
if (best_score < score) {
|
243
|
+
best_score = score;
|
244
|
+
best_len = len;
|
245
|
+
*best_len_out = best_len;
|
246
|
+
*best_len_code_out = best_len;
|
247
|
+
*best_distance_out = backward;
|
248
|
+
*best_score_out = score;
|
249
|
+
compare_char = ring_buffer[cur_ix_masked + best_len];
|
250
|
+
match_found = true;
|
251
|
+
}
|
252
|
+
}
|
253
|
+
}
|
254
|
+
}
|
255
|
+
if (kUseDictionary && !match_found &&
|
256
|
+
num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
257
|
+
++num_dict_lookups_;
|
258
|
+
const uint32_t key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
|
259
|
+
const uint16_t v = kStaticDictionaryHash[key];
|
260
|
+
if (v > 0) {
|
261
|
+
const int len = v & 31;
|
262
|
+
const int dist = v >> 5;
|
263
|
+
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
264
|
+
if (len <= max_length) {
|
265
|
+
const int matchlen =
|
266
|
+
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
267
|
+
&kBrotliDictionary[offset], len);
|
268
|
+
if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
|
269
|
+
const int transform_id = kCutoffTransforms[len - matchlen];
|
270
|
+
const int word_id =
|
271
|
+
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
272
|
+
dist;
|
273
|
+
const size_t backward = max_backward + word_id + 1;
|
274
|
+
const double score = BackwardReferenceScore(matchlen, backward);
|
275
|
+
if (best_score < score) {
|
276
|
+
++num_dict_matches_;
|
277
|
+
best_score = score;
|
278
|
+
best_len = matchlen;
|
279
|
+
*best_len_out = best_len;
|
280
|
+
*best_len_code_out = len;
|
281
|
+
*best_distance_out = backward;
|
282
|
+
*best_score_out = best_score;
|
283
|
+
return true;
|
284
|
+
}
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
return match_found;
|
290
|
+
}
|
291
|
+
|
292
|
+
enum { kHashLength = 5 };
|
293
|
+
enum { kHashTypeLength = 8 };
|
294
|
+
// HashBytes is the function that chooses the bucket to place
|
295
|
+
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
296
|
+
// classes have separate, different implementations of hashing.
|
297
|
+
static uint32_t HashBytes(const uint8_t *data) {
|
298
|
+
// Computing a hash based on 5 bytes works much better for
|
299
|
+
// qualities 1 and 3, where the next hash value is likely to replace
|
300
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
301
|
+
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
|
302
|
+
// The higher bits contain more mixture from the multiplication,
|
303
|
+
// so we take our results from there.
|
304
|
+
return h >> (64 - kBucketBits);
|
305
|
+
}
|
306
|
+
|
307
|
+
private:
|
308
|
+
static const uint32_t kBucketSize = 1 << kBucketBits;
|
309
|
+
uint32_t buckets_[kBucketSize + kBucketSweep];
|
310
|
+
size_t num_dict_lookups_;
|
311
|
+
size_t num_dict_matches_;
|
312
|
+
};
|
313
|
+
|
314
|
+
// The maximum length for which the zopflification uses distinct distances.
|
315
|
+
static const int kMaxZopfliLen = 325;
|
316
|
+
|
317
|
+
// A (forgetful) hash table to the data seen by the compressor, to
|
318
|
+
// help create backward references to previous data.
|
319
|
+
//
|
320
|
+
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
321
|
+
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
322
|
+
// index positions of the given hash key in the compressed data.
|
323
|
+
template <int kBucketBits,
|
324
|
+
int kBlockBits,
|
325
|
+
int kNumLastDistancesToCheck>
|
326
|
+
class HashLongestMatch {
|
327
|
+
public:
|
328
|
+
HashLongestMatch() {
|
329
|
+
Reset();
|
330
|
+
}
|
331
|
+
|
332
|
+
void Reset() {
|
333
|
+
memset(&num_[0], 0, sizeof(num_));
|
334
|
+
num_dict_lookups_ = 0;
|
335
|
+
num_dict_matches_ = 0;
|
336
|
+
}
|
337
|
+
|
338
|
+
// Look at 3 bytes at data.
|
339
|
+
// Compute a hash from these, and store the value of ix at that position.
|
340
|
+
inline void Store(const uint8_t *data, const int ix) {
|
341
|
+
const uint32_t key = HashBytes(data);
|
342
|
+
const int minor_ix = num_[key] & kBlockMask;
|
343
|
+
buckets_[key][minor_ix] = ix;
|
344
|
+
++num_[key];
|
345
|
+
}
|
346
|
+
|
347
|
+
// Store hashes for a range of data.
|
348
|
+
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
349
|
+
for (int p = 0; p < len; ++p) {
|
350
|
+
Store(&data[p & mask], startix + p);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
// Find a longest backward match of &data[cur_ix] up to the length of
|
355
|
+
// max_length.
|
356
|
+
//
|
357
|
+
// Does not look for matches longer than max_length.
|
358
|
+
// Does not look for matches further away than max_backward.
|
359
|
+
// Writes the best found match length into best_len_out.
|
360
|
+
// Writes the index (&data[index]) offset from the start of the best match
|
361
|
+
// into best_distance_out.
|
362
|
+
// Write the score of the best match into best_score_out.
|
363
|
+
bool FindLongestMatch(const uint8_t * __restrict data,
|
364
|
+
const size_t ring_buffer_mask,
|
365
|
+
const int* __restrict distance_cache,
|
366
|
+
const uint32_t cur_ix,
|
367
|
+
uint32_t max_length,
|
368
|
+
const uint32_t max_backward,
|
369
|
+
int * __restrict best_len_out,
|
370
|
+
int * __restrict best_len_code_out,
|
371
|
+
int * __restrict best_distance_out,
|
372
|
+
double * __restrict best_score_out) {
|
373
|
+
*best_len_code_out = 0;
|
374
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
375
|
+
bool match_found = false;
|
376
|
+
// Don't accept a short copy from far away.
|
377
|
+
double best_score = *best_score_out;
|
378
|
+
int best_len = *best_len_out;
|
379
|
+
*best_len_out = 0;
|
380
|
+
// Try last distance first.
|
381
|
+
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
|
382
|
+
const int idx = kDistanceCacheIndex[i];
|
383
|
+
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
|
384
|
+
size_t prev_ix = cur_ix - backward;
|
385
|
+
if (prev_ix >= cur_ix) {
|
386
|
+
continue;
|
387
|
+
}
|
388
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
389
|
+
continue;
|
390
|
+
}
|
391
|
+
prev_ix &= ring_buffer_mask;
|
392
|
+
|
393
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
394
|
+
prev_ix + best_len > ring_buffer_mask ||
|
395
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
396
|
+
continue;
|
397
|
+
}
|
398
|
+
const size_t len =
|
399
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
400
|
+
max_length);
|
401
|
+
if (len >= 3 || (len == 2 && i < 2)) {
|
402
|
+
// Comparing for >= 2 does not change the semantics, but just saves for
|
403
|
+
// a few unnecessary binary logarithms in backward reference score,
|
404
|
+
// since we are not interested in such short matches.
|
405
|
+
double score = BackwardReferenceScoreUsingLastDistance(len, i);
|
406
|
+
if (best_score < score) {
|
407
|
+
best_score = score;
|
408
|
+
best_len = len;
|
409
|
+
*best_len_out = best_len;
|
410
|
+
*best_len_code_out = best_len;
|
411
|
+
*best_distance_out = backward;
|
412
|
+
*best_score_out = best_score;
|
413
|
+
match_found = true;
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
418
|
+
const int * __restrict const bucket = &buckets_[key][0];
|
419
|
+
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
420
|
+
for (int i = num_[key] - 1; i >= down; --i) {
|
421
|
+
int prev_ix = bucket[i & kBlockMask];
|
422
|
+
if (prev_ix >= 0) {
|
423
|
+
const size_t backward = cur_ix - prev_ix;
|
424
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
425
|
+
break;
|
426
|
+
}
|
427
|
+
prev_ix &= ring_buffer_mask;
|
428
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
429
|
+
prev_ix + best_len > ring_buffer_mask ||
|
430
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
431
|
+
continue;
|
432
|
+
}
|
433
|
+
const size_t len =
|
434
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
435
|
+
max_length);
|
436
|
+
if (len >= 4) {
|
437
|
+
// Comparing for >= 3 does not change the semantics, but just saves
|
438
|
+
// for a few unnecessary binary logarithms in backward reference
|
439
|
+
// score, since we are not interested in such short matches.
|
440
|
+
double score = BackwardReferenceScore(len, backward);
|
441
|
+
if (best_score < score) {
|
442
|
+
best_score = score;
|
443
|
+
best_len = len;
|
444
|
+
*best_len_out = best_len;
|
445
|
+
*best_len_code_out = best_len;
|
446
|
+
*best_distance_out = backward;
|
447
|
+
*best_score_out = best_score;
|
448
|
+
match_found = true;
|
449
|
+
}
|
450
|
+
}
|
451
|
+
}
|
452
|
+
}
|
453
|
+
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
454
|
+
uint32_t key = Hash<14>(&data[cur_ix_masked]) << 1;
|
455
|
+
for (int k = 0; k < 2; ++k, ++key) {
|
456
|
+
++num_dict_lookups_;
|
457
|
+
const uint16_t v = kStaticDictionaryHash[key];
|
458
|
+
if (v > 0) {
|
459
|
+
const int len = v & 31;
|
460
|
+
const int dist = v >> 5;
|
461
|
+
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
462
|
+
if (len <= max_length) {
|
463
|
+
const int matchlen =
|
464
|
+
FindMatchLengthWithLimit(&data[cur_ix_masked],
|
465
|
+
&kBrotliDictionary[offset], len);
|
466
|
+
if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
|
467
|
+
const int transform_id = kCutoffTransforms[len - matchlen];
|
468
|
+
const int word_id =
|
469
|
+
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
470
|
+
dist;
|
471
|
+
const size_t backward = max_backward + word_id + 1;
|
472
|
+
double score = BackwardReferenceScore(matchlen, backward);
|
473
|
+
if (best_score < score) {
|
474
|
+
++num_dict_matches_;
|
475
|
+
best_score = score;
|
476
|
+
best_len = matchlen;
|
477
|
+
*best_len_out = best_len;
|
478
|
+
*best_len_code_out = len;
|
479
|
+
*best_distance_out = backward;
|
480
|
+
*best_score_out = best_score;
|
481
|
+
match_found = true;
|
482
|
+
}
|
483
|
+
}
|
484
|
+
}
|
485
|
+
}
|
486
|
+
}
|
487
|
+
}
|
488
|
+
return match_found;
|
489
|
+
}
|
490
|
+
|
491
|
+
// Similar to FindLongestMatch(), but finds all matches.
|
492
|
+
//
|
493
|
+
// Sets *num_matches to the number of matches found, and stores the found
|
494
|
+
// matches in matches[0] to matches[*num_matches - 1].
|
495
|
+
//
|
496
|
+
// If the longest match is longer than kMaxZopfliLen, returns only this
|
497
|
+
// longest match.
|
498
|
+
//
|
499
|
+
// Requires that at least kMaxZopfliLen space is available in matches.
|
500
|
+
void FindAllMatches(const uint8_t* data,
|
501
|
+
const size_t ring_buffer_mask,
|
502
|
+
const uint32_t cur_ix,
|
503
|
+
uint32_t max_length,
|
504
|
+
const uint32_t max_backward,
|
505
|
+
int* num_matches,
|
506
|
+
BackwardMatch* matches) const {
|
507
|
+
BackwardMatch* const orig_matches = matches;
|
508
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
509
|
+
int best_len = 1;
|
510
|
+
int stop = static_cast<int>(cur_ix) - 64;
|
511
|
+
if (stop < 0) { stop = 0; }
|
512
|
+
for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
513
|
+
size_t prev_ix = i;
|
514
|
+
const size_t backward = cur_ix - prev_ix;
|
515
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
516
|
+
break;
|
517
|
+
}
|
518
|
+
prev_ix &= ring_buffer_mask;
|
519
|
+
if (data[cur_ix_masked] != data[prev_ix] ||
|
520
|
+
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
521
|
+
continue;
|
522
|
+
}
|
523
|
+
const size_t len =
|
524
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
525
|
+
max_length);
|
526
|
+
if (len > best_len) {
|
527
|
+
best_len = len;
|
528
|
+
if (len > kMaxZopfliLen) {
|
529
|
+
matches = orig_matches;
|
530
|
+
}
|
531
|
+
*matches++ = BackwardMatch(backward, len);
|
532
|
+
}
|
533
|
+
}
|
534
|
+
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
535
|
+
const int * __restrict const bucket = &buckets_[key][0];
|
536
|
+
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
537
|
+
for (int i = num_[key] - 1; i >= down; --i) {
|
538
|
+
int prev_ix = bucket[i & kBlockMask];
|
539
|
+
if (prev_ix >= 0) {
|
540
|
+
const size_t backward = cur_ix - prev_ix;
|
541
|
+
if (PREDICT_FALSE(backward > max_backward)) {
|
542
|
+
break;
|
543
|
+
}
|
544
|
+
prev_ix &= ring_buffer_mask;
|
545
|
+
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
546
|
+
prev_ix + best_len > ring_buffer_mask ||
|
547
|
+
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
548
|
+
continue;
|
549
|
+
}
|
550
|
+
const size_t len =
|
551
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
552
|
+
max_length);
|
553
|
+
if (len > best_len) {
|
554
|
+
best_len = len;
|
555
|
+
if (len > kMaxZopfliLen) {
|
556
|
+
matches = orig_matches;
|
557
|
+
}
|
558
|
+
*matches++ = BackwardMatch(backward, len);
|
559
|
+
}
|
560
|
+
}
|
561
|
+
}
|
562
|
+
std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
|
563
|
+
int minlen = std::max<int>(4, best_len + 1);
|
564
|
+
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
565
|
+
&dict_matches[0])) {
|
566
|
+
int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
|
567
|
+
for (int l = minlen; l <= maxlen; ++l) {
|
568
|
+
int dict_id = dict_matches[l];
|
569
|
+
if (dict_id < kInvalidMatch) {
|
570
|
+
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
|
571
|
+
dict_id & 31);
|
572
|
+
}
|
573
|
+
}
|
574
|
+
}
|
575
|
+
*num_matches += matches - orig_matches;
|
576
|
+
}
|
577
|
+
|
578
|
+
enum { kHashLength = 4 };
|
579
|
+
enum { kHashTypeLength = 4 };
|
580
|
+
|
581
|
+
// HashBytes is the function that chooses the bucket to place
|
582
|
+
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
583
|
+
// classes have separate, different implementations of hashing.
|
584
|
+
static uint32_t HashBytes(const uint8_t *data) {
|
585
|
+
// kHashMul32 multiplier has these properties:
|
586
|
+
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
587
|
+
// * No long streaks of 1s or 0s.
|
588
|
+
// * Is not unfortunate (see the unittest) for the English language.
|
589
|
+
// * There is no effort to ensure that it is a prime, the oddity is enough
|
590
|
+
// for this use.
|
591
|
+
// * The number has been tuned heuristically against compression benchmarks.
|
592
|
+
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
593
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
594
|
+
// The higher bits contain more mixture from the multiplication,
|
595
|
+
// so we take our results from there.
|
596
|
+
return h >> (32 - kBucketBits);
|
597
|
+
}
|
598
|
+
|
599
|
+
private:
|
600
|
+
// Number of hash buckets.
|
601
|
+
static const uint32_t kBucketSize = 1 << kBucketBits;
|
602
|
+
|
603
|
+
// Only kBlockSize newest backward references are kept,
|
604
|
+
// and the older are forgotten.
|
605
|
+
static const uint32_t kBlockSize = 1 << kBlockBits;
|
606
|
+
|
607
|
+
// Mask for accessing entries in a block (in a ringbuffer manner).
|
608
|
+
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
|
609
|
+
|
610
|
+
// Number of entries in a particular bucket.
|
611
|
+
uint16_t num_[kBucketSize];
|
612
|
+
|
613
|
+
// Buckets containing kBlockSize of backward references.
|
614
|
+
int buckets_[kBucketSize][kBlockSize];
|
615
|
+
|
616
|
+
size_t num_dict_lookups_;
|
617
|
+
size_t num_dict_matches_;
|
618
|
+
};
|
619
|
+
|
620
|
+
struct Hashers {
|
621
|
+
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
|
622
|
+
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
623
|
+
// and html inputs.
|
624
|
+
typedef HashLongestMatchQuickly<16, 1, true> H1;
|
625
|
+
typedef HashLongestMatchQuickly<16, 2, false> H2;
|
626
|
+
typedef HashLongestMatchQuickly<16, 4, false> H3;
|
627
|
+
typedef HashLongestMatchQuickly<17, 4, true> H4;
|
628
|
+
typedef HashLongestMatch<14, 4, 4> H5;
|
629
|
+
typedef HashLongestMatch<14, 5, 4> H6;
|
630
|
+
typedef HashLongestMatch<15, 6, 10> H7;
|
631
|
+
typedef HashLongestMatch<15, 7, 10> H8;
|
632
|
+
typedef HashLongestMatch<15, 8, 16> H9;
|
633
|
+
|
634
|
+
void Init(int type) {
|
635
|
+
switch (type) {
|
636
|
+
case 1: hash_h1.reset(new H1); break;
|
637
|
+
case 2: hash_h2.reset(new H2); break;
|
638
|
+
case 3: hash_h3.reset(new H3); break;
|
639
|
+
case 4: hash_h4.reset(new H4); break;
|
640
|
+
case 5: hash_h5.reset(new H5); break;
|
641
|
+
case 6: hash_h6.reset(new H6); break;
|
642
|
+
case 7: hash_h7.reset(new H7); break;
|
643
|
+
case 8: hash_h8.reset(new H8); break;
|
644
|
+
case 9: hash_h9.reset(new H9); break;
|
645
|
+
default: break;
|
646
|
+
}
|
647
|
+
}
|
648
|
+
|
649
|
+
template<typename Hasher>
|
650
|
+
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
651
|
+
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
|
652
|
+
hasher->Store(dict, i);
|
653
|
+
}
|
654
|
+
}
|
655
|
+
|
656
|
+
// Custom LZ77 window.
|
657
|
+
void PrependCustomDictionary(
|
658
|
+
int type, const size_t size, const uint8_t* dict) {
|
659
|
+
switch (type) {
|
660
|
+
case 1: WarmupHash(size, dict, hash_h1.get()); break;
|
661
|
+
case 2: WarmupHash(size, dict, hash_h2.get()); break;
|
662
|
+
case 3: WarmupHash(size, dict, hash_h3.get()); break;
|
663
|
+
case 4: WarmupHash(size, dict, hash_h4.get()); break;
|
664
|
+
case 5: WarmupHash(size, dict, hash_h5.get()); break;
|
665
|
+
case 6: WarmupHash(size, dict, hash_h6.get()); break;
|
666
|
+
case 7: WarmupHash(size, dict, hash_h7.get()); break;
|
667
|
+
case 8: WarmupHash(size, dict, hash_h8.get()); break;
|
668
|
+
case 9: WarmupHash(size, dict, hash_h9.get()); break;
|
669
|
+
default: break;
|
670
|
+
}
|
671
|
+
}
|
672
|
+
|
673
|
+
std::unique_ptr<H1> hash_h1;
|
674
|
+
std::unique_ptr<H2> hash_h2;
|
675
|
+
std::unique_ptr<H3> hash_h3;
|
676
|
+
std::unique_ptr<H4> hash_h4;
|
677
|
+
std::unique_ptr<H5> hash_h5;
|
678
|
+
std::unique_ptr<H6> hash_h6;
|
679
|
+
std::unique_ptr<H7> hash_h7;
|
680
|
+
std::unique_ptr<H8> hash_h8;
|
681
|
+
std::unique_ptr<H9> hash_h9;
|
682
|
+
};
|
683
|
+
|
684
|
+
} // namespace brotli
|
685
|
+
|
686
|
+
#endif // BROTLI_ENC_HASH_H_
|