extbrotli 0.0.1.PROTOTYPE

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +28 -0
  3. data/README.md +67 -0
  4. data/Rakefile +158 -0
  5. data/contrib/brotli/LICENSE +202 -0
  6. data/contrib/brotli/README.md +18 -0
  7. data/contrib/brotli/dec/bit_reader.c +55 -0
  8. data/contrib/brotli/dec/bit_reader.h +256 -0
  9. data/contrib/brotli/dec/context.h +260 -0
  10. data/contrib/brotli/dec/decode.c +1573 -0
  11. data/contrib/brotli/dec/decode.h +160 -0
  12. data/contrib/brotli/dec/dictionary.h +9494 -0
  13. data/contrib/brotli/dec/huffman.c +325 -0
  14. data/contrib/brotli/dec/huffman.h +77 -0
  15. data/contrib/brotli/dec/port.h +148 -0
  16. data/contrib/brotli/dec/prefix.h +756 -0
  17. data/contrib/brotli/dec/state.c +149 -0
  18. data/contrib/brotli/dec/state.h +185 -0
  19. data/contrib/brotli/dec/streams.c +99 -0
  20. data/contrib/brotli/dec/streams.h +100 -0
  21. data/contrib/brotli/dec/transform.h +315 -0
  22. data/contrib/brotli/dec/types.h +36 -0
  23. data/contrib/brotli/enc/backward_references.cc +769 -0
  24. data/contrib/brotli/enc/backward_references.h +50 -0
  25. data/contrib/brotli/enc/bit_cost.h +147 -0
  26. data/contrib/brotli/enc/block_splitter.cc +418 -0
  27. data/contrib/brotli/enc/block_splitter.h +78 -0
  28. data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
  29. data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
  30. data/contrib/brotli/enc/cluster.h +290 -0
  31. data/contrib/brotli/enc/command.h +140 -0
  32. data/contrib/brotli/enc/context.h +185 -0
  33. data/contrib/brotli/enc/dictionary.h +9485 -0
  34. data/contrib/brotli/enc/dictionary_hash.h +4125 -0
  35. data/contrib/brotli/enc/encode.cc +715 -0
  36. data/contrib/brotli/enc/encode.h +196 -0
  37. data/contrib/brotli/enc/encode_parallel.cc +354 -0
  38. data/contrib/brotli/enc/encode_parallel.h +37 -0
  39. data/contrib/brotli/enc/entropy_encode.cc +492 -0
  40. data/contrib/brotli/enc/entropy_encode.h +88 -0
  41. data/contrib/brotli/enc/fast_log.h +179 -0
  42. data/contrib/brotli/enc/find_match_length.h +87 -0
  43. data/contrib/brotli/enc/hash.h +686 -0
  44. data/contrib/brotli/enc/histogram.cc +76 -0
  45. data/contrib/brotli/enc/histogram.h +100 -0
  46. data/contrib/brotli/enc/literal_cost.cc +172 -0
  47. data/contrib/brotli/enc/literal_cost.h +38 -0
  48. data/contrib/brotli/enc/metablock.cc +544 -0
  49. data/contrib/brotli/enc/metablock.h +88 -0
  50. data/contrib/brotli/enc/port.h +151 -0
  51. data/contrib/brotli/enc/prefix.h +85 -0
  52. data/contrib/brotli/enc/ringbuffer.h +108 -0
  53. data/contrib/brotli/enc/static_dict.cc +441 -0
  54. data/contrib/brotli/enc/static_dict.h +40 -0
  55. data/contrib/brotli/enc/static_dict_lut.h +12063 -0
  56. data/contrib/brotli/enc/streams.cc +127 -0
  57. data/contrib/brotli/enc/streams.h +129 -0
  58. data/contrib/brotli/enc/transform.h +250 -0
  59. data/contrib/brotli/enc/write_bits.h +91 -0
  60. data/ext/extbrotli.cc +24 -0
  61. data/ext/extbrotli.h +73 -0
  62. data/ext/extconf.rb +35 -0
  63. data/ext/lldecoder.c +220 -0
  64. data/ext/llencoder.cc +433 -0
  65. data/gemstub.rb +21 -0
  66. data/lib/extbrotli.rb +243 -0
  67. data/lib/extbrotli/version.rb +3 -0
  68. metadata +140 -0
@@ -0,0 +1,88 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Entropy encoding (Huffman) utilities.
16
+
17
+ #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
18
+ #define BROTLI_ENC_ENTROPY_ENCODE_H_
19
+
20
+ #include <stdint.h>
21
+ #include <string.h>
22
+ #include <vector>
23
+ #include "./histogram.h"
24
+ #include "./prefix.h"
25
+
26
+ namespace brotli {
27
+
28
+ // This function will create a Huffman tree.
29
+ //
30
+ // The (data,length) contains the population counts.
31
+ // The tree_limit is the maximum bit depth of the Huffman codes.
32
+ //
33
+ // The depth contains the tree, i.e., how many bits are used for
34
+ // the symbol.
35
+ //
36
+ // See http://en.wikipedia.org/wiki/Huffman_coding
37
+ void CreateHuffmanTree(const int *data,
38
+ const int length,
39
+ const int tree_limit,
40
+ uint8_t *depth);
41
+
42
+ // Change the population counts in a way that the consequent
43
+ // Huffman tree compression, especially its rle-part will be more
44
+ // likely to compress this data more efficiently.
45
+ //
46
+ // length contains the size of the histogram.
47
+ // counts contains the population counts.
48
+ int OptimizeHuffmanCountsForRle(int length, int* counts);
49
+
50
+ // Write a huffman tree from bit depths into the bitstream representation
51
+ // of a Huffman tree. The generated Huffman tree is to be compressed once
52
+ // more using a Huffman tree
53
+ void WriteHuffmanTree(const uint8_t* depth,
54
+ uint32_t num,
55
+ std::vector<uint8_t> *tree,
56
+ std::vector<uint8_t> *extra_bits_data);
57
+
58
+ // Get the actual bit values for a tree of bit depths.
59
+ void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
60
+
61
+ template<int kSize>
62
+ struct EntropyCode {
63
+ // How many bits for symbol.
64
+ uint8_t depth_[kSize];
65
+ // Actual bits used to represent the symbol.
66
+ uint16_t bits_[kSize];
67
+ // How many non-zero depth.
68
+ int count_;
69
+ // First four symbols with non-zero depth.
70
+ int symbols_[4];
71
+ };
72
+
73
+ static const int kCodeLengthCodes = 18;
74
+
75
+ // Literal entropy code.
76
+ typedef EntropyCode<256> EntropyCodeLiteral;
77
+ // Prefix entropy codes.
78
+ typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
79
+ typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
80
+ typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
81
+ // Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
82
+ typedef EntropyCode<272> EntropyCodeContextMap;
83
+ // Block type entropy code, 256 block types + 2 special symbols.
84
+ typedef EntropyCode<258> EntropyCodeBlockType;
85
+
86
+ } // namespace brotli
87
+
88
+ #endif // BROTLI_ENC_ENTROPY_ENCODE_H_
@@ -0,0 +1,179 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Utilities for fast computation of logarithms.
16
+
17
+ #ifndef BROTLI_ENC_FAST_LOG_H_
18
+ #define BROTLI_ENC_FAST_LOG_H_
19
+
20
+ #include <assert.h>
21
+ #include <math.h>
22
+ #include <stdint.h>
23
+
24
+ namespace brotli {
25
+
26
+ // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
27
+ inline int Log2Floor(uint32_t n) {
28
+ #if defined(__clang__) || \
29
+ (defined(__GNUC__) && \
30
+ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
31
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
32
+ #else
33
+ if (n == 0)
34
+ return -1;
35
+ int log = 0;
36
+ uint32_t value = n;
37
+ for (int i = 4; i >= 0; --i) {
38
+ int shift = (1 << i);
39
+ uint32_t x = value >> shift;
40
+ if (x != 0) {
41
+ value = x;
42
+ log += shift;
43
+ }
44
+ }
45
+ assert(value == 1);
46
+ return log;
47
+ #endif
48
+ }
49
+
50
+ static inline int Log2FloorNonZero(uint32_t n) {
51
+ #ifdef __GNUC__
52
+ return 31 ^ __builtin_clz(n);
53
+ #else
54
+ unsigned int result = 0;
55
+ while (n >>= 1) result++;
56
+ return result;
57
+ #endif
58
+ }
59
+
60
+ // Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
61
+ inline int Log2Ceiling(uint32_t n) {
62
+ int floor = Log2Floor(n);
63
+ if (n == (n &~ (n - 1))) // zero or a power of two
64
+ return floor;
65
+ else
66
+ return floor + 1;
67
+ }
68
+
69
+ // A lookup table for small values of log2(int) to be used in entropy
70
+ // computation.
71
+ //
72
+ // ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
73
+ static const float kLog2Table[] = {
74
+ 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
75
+ 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
76
+ 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
77
+ 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
78
+ 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
79
+ 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
80
+ 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
81
+ 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
82
+ 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
83
+ 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
84
+ 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
85
+ 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
86
+ 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
87
+ 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
88
+ 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
89
+ 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
90
+ 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
91
+ 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
92
+ 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
93
+ 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
94
+ 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
95
+ 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
96
+ 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
97
+ 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
98
+ 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
99
+ 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
100
+ 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
101
+ 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
102
+ 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
103
+ 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
104
+ 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
105
+ 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
106
+ 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
107
+ 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
108
+ 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
109
+ 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
110
+ 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
111
+ 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
112
+ 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
113
+ 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
114
+ 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
115
+ 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
116
+ 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
117
+ 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
118
+ 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
119
+ 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
120
+ 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
121
+ 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
122
+ 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
123
+ 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
124
+ 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
125
+ 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
126
+ 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
127
+ 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
128
+ 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
129
+ 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
130
+ 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
131
+ 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
132
+ 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
133
+ 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
134
+ 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
135
+ 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
136
+ 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
137
+ 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
138
+ 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
139
+ 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
140
+ 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
141
+ 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
142
+ 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
143
+ 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
144
+ 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
145
+ 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
146
+ 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
147
+ 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
148
+ 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
149
+ 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
150
+ 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
151
+ 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
152
+ 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
153
+ 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
154
+ 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
155
+ 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
156
+ 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
157
+ 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
158
+ 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
159
+ 7.9943534368588578f
160
+ };
161
+
162
+ // Faster logarithm for small integers, with the property of log2(0) == 0.
163
+ static inline double FastLog2(int v) {
164
+ if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
165
+ return kLog2Table[v];
166
+ }
167
+ #if defined(_MSC_VER) && _MSC_VER <= 1600
168
+ // Visual Studio 2010 does not have the log2() function defined, so we use
169
+ // log() and a multiplication instead.
170
+ static const double kLog2Inv = 1.4426950408889634f;
171
+ return log(static_cast<double>(v)) * kLog2Inv;
172
+ #else
173
+ return log2(static_cast<double>(v));
174
+ #endif
175
+ }
176
+
177
+ } // namespace brotli
178
+
179
+ #endif // BROTLI_ENC_FAST_LOG_H_
@@ -0,0 +1,87 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Function to find maximal matching prefixes of strings.
16
+
17
+ #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
18
+ #define BROTLI_ENC_FIND_MATCH_LENGTH_H_
19
+
20
+ #include <stdint.h>
21
+
22
+ #include <stddef.h>
23
+
24
+ #include "./port.h"
25
+
26
+ namespace brotli {
27
+
28
+ // Separate implementation for little-endian 64-bit targets, for speed.
29
+ #if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
30
+
31
+ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
32
+ const uint8_t* s2,
33
+ size_t limit) {
34
+ int matched = 0;
35
+ size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
36
+ while (PREDICT_TRUE(--limit2)) {
37
+ if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
38
+ BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
39
+ s2 += 8;
40
+ matched += 8;
41
+ } else {
42
+ uint64_t x =
43
+ BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
44
+ int matching_bits = __builtin_ctzll(x);
45
+ matched += matching_bits >> 3;
46
+ return matched;
47
+ }
48
+ }
49
+ limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
50
+ while (--limit) {
51
+ if (PREDICT_TRUE(s1[matched] == *s2)) {
52
+ ++s2;
53
+ ++matched;
54
+ } else {
55
+ return matched;
56
+ }
57
+ }
58
+ return matched;
59
+ }
60
+ #else
61
+ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
62
+ const uint8_t* s2,
63
+ size_t limit) {
64
+ int matched = 0;
65
+ const uint8_t* s2_limit = s2 + limit;
66
+ const uint8_t* s2_ptr = s2;
67
+ // Find out how long the match is. We loop over the data 32 bits at a
68
+ // time until we find a 32-bit block that doesn't match; then we find
69
+ // the first non-matching bit and use that to calculate the total
70
+ // length of the match.
71
+ while (s2_ptr <= s2_limit - 4 &&
72
+ BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
73
+ BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
74
+ s2_ptr += 4;
75
+ matched += 4;
76
+ }
77
+ while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
78
+ ++s2_ptr;
79
+ ++matched;
80
+ }
81
+ return matched;
82
+ }
83
+ #endif
84
+
85
+ } // namespace brotli
86
+
87
+ #endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
@@ -0,0 +1,686 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // A (forgetful) hash table to the data seen by the compressor, to
16
+ // help create backward references to previous data.
17
+
18
+ #ifndef BROTLI_ENC_HASH_H_
19
+ #define BROTLI_ENC_HASH_H_
20
+
21
+ #include <stddef.h>
22
+ #include <stdint.h>
23
+ #include <string.h>
24
+ #include <sys/types.h>
25
+ #include <algorithm>
26
+ #include <cstdlib>
27
+ #include <memory>
28
+ #include <string>
29
+
30
+ #include "./dictionary_hash.h"
31
+ #include "./fast_log.h"
32
+ #include "./find_match_length.h"
33
+ #include "./port.h"
34
+ #include "./prefix.h"
35
+ #include "./static_dict.h"
36
+ #include "./transform.h"
37
+
38
+ namespace brotli {
39
+
40
+ static const int kDistanceCacheIndex[] = {
41
+ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
42
+ };
43
+ static const int kDistanceCacheOffset[] = {
44
+ 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
45
+ };
46
+
47
+ static const int kCutoffTransformsCount = 10;
48
+ static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
49
+
50
+ // kHashMul32 multiplier has these properties:
51
+ // * The multiplier must be odd. Otherwise we may lose the highest bit.
52
+ // * No long streaks of 1s or 0s.
53
+ // * There is no effort to ensure that it is a prime, the oddity is enough
54
+ // for this use.
55
+ // * The number has been tuned heuristically against compression benchmarks.
56
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
57
+
58
+ template<int kShiftBits>
59
+ inline uint32_t Hash(const uint8_t *data) {
60
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
61
+ // The higher bits contain more mixture from the multiplication,
62
+ // so we take our results from there.
63
+ return h >> (32 - kShiftBits);
64
+ }
65
+
66
+ // Usually, we always choose the longest backward reference. This function
67
+ // allows for the exception of that rule.
68
+ //
69
+ // If we choose a backward reference that is further away, it will
70
+ // usually be coded with more bits. We approximate this by assuming
71
+ // log2(distance). If the distance can be expressed in terms of the
72
+ // last four distances, we use some heuristic constants to estimate
73
+ // the bits cost. For the first up to four literals we use the bit
74
+ // cost of the literals from the literal cost model, after that we
75
+ // use the average bit cost of the cost model.
76
+ //
77
+ // This function is used to sometimes discard a longer backward reference
78
+ // when it is not much longer and the bit cost for encoding it is more
79
+ // than the saved literals.
80
+ inline double BackwardReferenceScore(int copy_length,
81
+ int backward_reference_offset) {
82
+ return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
83
+ }
84
+
85
+ inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
86
+ int distance_short_code) {
87
+ static const double kDistanceShortCodeBitCost[16] = {
88
+ -0.6, 0.95, 1.17, 1.27,
89
+ 0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
90
+ 1.05, 1.05, 1.15, 1.15, 1.25, 1.25
91
+ };
92
+ return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
93
+ }
94
+
95
+ struct BackwardMatch {
96
+ BackwardMatch() : distance(0), length_and_code(0) {}
97
+
98
+ BackwardMatch(int dist, int len)
99
+ : distance(dist), length_and_code((len << 5)) {}
100
+
101
+ BackwardMatch(int dist, int len, int len_code)
102
+ : distance(dist),
103
+ length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
104
+
105
+ int length() const {
106
+ return length_and_code >> 5;
107
+ }
108
+ int length_code() const {
109
+ int code = length_and_code & 31;
110
+ return code ? code : length();
111
+ }
112
+
113
+ int distance;
114
+ int length_and_code;
115
+ };
116
+
117
+ // A (forgetful) hash table to the data seen by the compressor, to
118
+ // help create backward references to previous data.
119
+ //
120
+ // This is a hash map of fixed size (kBucketSize). Starting from the
121
+ // given index, kBucketSweep buckets are used to store values of a key.
122
+ template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
123
+ class HashLongestMatchQuickly {
124
+ public:
125
+ HashLongestMatchQuickly() {
126
+ Reset();
127
+ }
128
+ void Reset() {
129
+ // It is not strictly necessary to fill this buffer here, but
130
+ // not filling will make the results of the compression stochastic
131
+ // (but correct). This is because random data would cause the
132
+ // system to find accidentally good backward references here and there.
133
+ memset(&buckets_[0], 0, sizeof(buckets_));
134
+ num_dict_lookups_ = 0;
135
+ num_dict_matches_ = 0;
136
+ }
137
+ // Look at 4 bytes at data.
138
+ // Compute a hash from these, and store the value somewhere within
139
+ // [ix .. ix+3].
140
+ inline void Store(const uint8_t *data, const int ix) {
141
+ const uint32_t key = HashBytes(data);
142
+ // Wiggle the value with the bucket sweep range.
143
+ const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
144
+ buckets_[key + off] = ix;
145
+ }
146
+
147
+ // Store hashes for a range of data.
148
+ void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
149
+ for (int p = 0; p < len; ++p) {
150
+ Store(&data[p & mask], startix + p);
151
+ }
152
+ }
153
+
154
+ // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
155
+ // up to the length of max_length.
156
+ //
157
+ // Does not look for matches longer than max_length.
158
+ // Does not look for matches further away than max_backward.
159
+ // Writes the best found match length into best_len_out.
160
+ // Writes the index (&data[index]) of the start of the best match into
161
+ // best_distance_out.
162
+ inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
163
+ const size_t ring_buffer_mask,
164
+ const int* __restrict distance_cache,
165
+ const uint32_t cur_ix,
166
+ const uint32_t max_length,
167
+ const uint32_t max_backward,
168
+ int * __restrict best_len_out,
169
+ int * __restrict best_len_code_out,
170
+ int * __restrict best_distance_out,
171
+ double* __restrict best_score_out) {
172
+ const int best_len_in = *best_len_out;
173
+ const int cur_ix_masked = cur_ix & ring_buffer_mask;
174
+ int compare_char = ring_buffer[cur_ix_masked + best_len_in];
175
+ double best_score = *best_score_out;
176
+ int best_len = best_len_in;
177
+ int backward = distance_cache[0];
178
+ size_t prev_ix = cur_ix - backward;
179
+ bool match_found = false;
180
+ if (prev_ix < cur_ix) {
181
+ prev_ix &= ring_buffer_mask;
182
+ if (compare_char == ring_buffer[prev_ix + best_len]) {
183
+ int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
184
+ &ring_buffer[cur_ix_masked],
185
+ max_length);
186
+ if (len >= 4) {
187
+ best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
188
+ best_len = len;
189
+ *best_len_out = len;
190
+ *best_len_code_out = len;
191
+ *best_distance_out = backward;
192
+ *best_score_out = best_score;
193
+ compare_char = ring_buffer[cur_ix_masked + best_len];
194
+ if (kBucketSweep == 1) {
195
+ return true;
196
+ } else {
197
+ match_found = true;
198
+ }
199
+ }
200
+ }
201
+ }
202
+ const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
203
+ if (kBucketSweep == 1) {
204
+ // Only one to look for, don't bother to prepare for a loop.
205
+ prev_ix = buckets_[key];
206
+ backward = cur_ix - prev_ix;
207
+ prev_ix &= ring_buffer_mask;
208
+ if (compare_char != ring_buffer[prev_ix + best_len_in]) {
209
+ return false;
210
+ }
211
+ if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
212
+ return false;
213
+ }
214
+ const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
215
+ &ring_buffer[cur_ix_masked],
216
+ max_length);
217
+ if (len >= 4) {
218
+ *best_len_out = len;
219
+ *best_len_code_out = len;
220
+ *best_distance_out = backward;
221
+ *best_score_out = BackwardReferenceScore(len, backward);
222
+ return true;
223
+ }
224
+ } else {
225
+ uint32_t *bucket = buckets_ + key;
226
+ prev_ix = *bucket++;
227
+ for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
228
+ const int backward = cur_ix - prev_ix;
229
+ prev_ix &= ring_buffer_mask;
230
+ if (compare_char != ring_buffer[prev_ix + best_len]) {
231
+ continue;
232
+ }
233
+ if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
234
+ continue;
235
+ }
236
+ const int len =
237
+ FindMatchLengthWithLimit(&ring_buffer[prev_ix],
238
+ &ring_buffer[cur_ix_masked],
239
+ max_length);
240
+ if (len >= 4) {
241
+ const double score = BackwardReferenceScore(len, backward);
242
+ if (best_score < score) {
243
+ best_score = score;
244
+ best_len = len;
245
+ *best_len_out = best_len;
246
+ *best_len_code_out = best_len;
247
+ *best_distance_out = backward;
248
+ *best_score_out = score;
249
+ compare_char = ring_buffer[cur_ix_masked + best_len];
250
+ match_found = true;
251
+ }
252
+ }
253
+ }
254
+ }
255
+ if (kUseDictionary && !match_found &&
256
+ num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
257
+ ++num_dict_lookups_;
258
+ const uint32_t key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
259
+ const uint16_t v = kStaticDictionaryHash[key];
260
+ if (v > 0) {
261
+ const int len = v & 31;
262
+ const int dist = v >> 5;
263
+ const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
264
+ if (len <= max_length) {
265
+ const int matchlen =
266
+ FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
267
+ &kBrotliDictionary[offset], len);
268
+ if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
269
+ const int transform_id = kCutoffTransforms[len - matchlen];
270
+ const int word_id =
271
+ transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
272
+ dist;
273
+ const size_t backward = max_backward + word_id + 1;
274
+ const double score = BackwardReferenceScore(matchlen, backward);
275
+ if (best_score < score) {
276
+ ++num_dict_matches_;
277
+ best_score = score;
278
+ best_len = matchlen;
279
+ *best_len_out = best_len;
280
+ *best_len_code_out = len;
281
+ *best_distance_out = backward;
282
+ *best_score_out = best_score;
283
+ return true;
284
+ }
285
+ }
286
+ }
287
+ }
288
+ }
289
+ return match_found;
290
+ }
291
+
292
+ enum { kHashLength = 5 };
293
+ enum { kHashTypeLength = 8 };
294
+ // HashBytes is the function that chooses the bucket to place
295
+ // the address in. The HashLongestMatch and HashLongestMatchQuickly
296
+ // classes have separate, different implementations of hashing.
297
+ static uint32_t HashBytes(const uint8_t *data) {
298
+ // Computing a hash based on 5 bytes works much better for
299
+ // qualities 1 and 3, where the next hash value is likely to replace
300
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
301
+ uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
302
+ // The higher bits contain more mixture from the multiplication,
303
+ // so we take our results from there.
304
+ return h >> (64 - kBucketBits);
305
+ }
306
+
307
+ private:
308
+ static const uint32_t kBucketSize = 1 << kBucketBits;
309
+ uint32_t buckets_[kBucketSize + kBucketSweep];
310
+ size_t num_dict_lookups_;
311
+ size_t num_dict_matches_;
312
+ };
313
+
314
+ // The maximum length for which the zopflification uses distinct distances.
315
+ static const int kMaxZopfliLen = 325;
316
+
317
+ // A (forgetful) hash table to the data seen by the compressor, to
318
+ // help create backward references to previous data.
319
+ //
320
+ // This is a hash map of fixed size (kBucketSize) to a ring buffer of
321
+ // fixed size (kBlockSize). The ring buffer contains the last kBlockSize
322
+ // index positions of the given hash key in the compressed data.
323
+ template <int kBucketBits,
324
+ int kBlockBits,
325
+ int kNumLastDistancesToCheck>
326
+ class HashLongestMatch {
327
+ public:
328
+ HashLongestMatch() {
329
+ Reset();
330
+ }
331
+
332
+ void Reset() {
333
+ memset(&num_[0], 0, sizeof(num_));
334
+ num_dict_lookups_ = 0;
335
+ num_dict_matches_ = 0;
336
+ }
337
+
338
+ // Look at 3 bytes at data.
339
+ // Compute a hash from these, and store the value of ix at that position.
340
+ inline void Store(const uint8_t *data, const int ix) {
341
+ const uint32_t key = HashBytes(data);
342
+ const int minor_ix = num_[key] & kBlockMask;
343
+ buckets_[key][minor_ix] = ix;
344
+ ++num_[key];
345
+ }
346
+
347
+ // Store hashes for a range of data.
348
+ void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
349
+ for (int p = 0; p < len; ++p) {
350
+ Store(&data[p & mask], startix + p);
351
+ }
352
+ }
353
+
354
+ // Find a longest backward match of &data[cur_ix] up to the length of
355
+ // max_length.
356
+ //
357
+ // Does not look for matches longer than max_length.
358
+ // Does not look for matches further away than max_backward.
359
+ // Writes the best found match length into best_len_out.
360
+ // Writes the index (&data[index]) offset from the start of the best match
361
+ // into best_distance_out.
362
+ // Write the score of the best match into best_score_out.
363
+ bool FindLongestMatch(const uint8_t * __restrict data,
364
+ const size_t ring_buffer_mask,
365
+ const int* __restrict distance_cache,
366
+ const uint32_t cur_ix,
367
+ uint32_t max_length,
368
+ const uint32_t max_backward,
369
+ int * __restrict best_len_out,
370
+ int * __restrict best_len_code_out,
371
+ int * __restrict best_distance_out,
372
+ double * __restrict best_score_out) {
373
+ *best_len_code_out = 0;
374
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
375
+ bool match_found = false;
376
+ // Don't accept a short copy from far away.
377
+ double best_score = *best_score_out;
378
+ int best_len = *best_len_out;
379
+ *best_len_out = 0;
380
+ // Try last distance first.
381
+ for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
382
+ const int idx = kDistanceCacheIndex[i];
383
+ const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
384
+ size_t prev_ix = cur_ix - backward;
385
+ if (prev_ix >= cur_ix) {
386
+ continue;
387
+ }
388
+ if (PREDICT_FALSE(backward > max_backward)) {
389
+ continue;
390
+ }
391
+ prev_ix &= ring_buffer_mask;
392
+
393
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
394
+ prev_ix + best_len > ring_buffer_mask ||
395
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
396
+ continue;
397
+ }
398
+ const size_t len =
399
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
400
+ max_length);
401
+ if (len >= 3 || (len == 2 && i < 2)) {
402
+ // Comparing for >= 2 does not change the semantics, but just saves for
403
+ // a few unnecessary binary logarithms in backward reference score,
404
+ // since we are not interested in such short matches.
405
+ double score = BackwardReferenceScoreUsingLastDistance(len, i);
406
+ if (best_score < score) {
407
+ best_score = score;
408
+ best_len = len;
409
+ *best_len_out = best_len;
410
+ *best_len_code_out = best_len;
411
+ *best_distance_out = backward;
412
+ *best_score_out = best_score;
413
+ match_found = true;
414
+ }
415
+ }
416
+ }
417
+ const uint32_t key = HashBytes(&data[cur_ix_masked]);
418
+ const int * __restrict const bucket = &buckets_[key][0];
419
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
420
+ for (int i = num_[key] - 1; i >= down; --i) {
421
+ int prev_ix = bucket[i & kBlockMask];
422
+ if (prev_ix >= 0) {
423
+ const size_t backward = cur_ix - prev_ix;
424
+ if (PREDICT_FALSE(backward > max_backward)) {
425
+ break;
426
+ }
427
+ prev_ix &= ring_buffer_mask;
428
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
429
+ prev_ix + best_len > ring_buffer_mask ||
430
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
431
+ continue;
432
+ }
433
+ const size_t len =
434
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
435
+ max_length);
436
+ if (len >= 4) {
437
+ // Comparing for >= 3 does not change the semantics, but just saves
438
+ // for a few unnecessary binary logarithms in backward reference
439
+ // score, since we are not interested in such short matches.
440
+ double score = BackwardReferenceScore(len, backward);
441
+ if (best_score < score) {
442
+ best_score = score;
443
+ best_len = len;
444
+ *best_len_out = best_len;
445
+ *best_len_code_out = best_len;
446
+ *best_distance_out = backward;
447
+ *best_score_out = best_score;
448
+ match_found = true;
449
+ }
450
+ }
451
+ }
452
+ }
453
+ if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
454
+ uint32_t key = Hash<14>(&data[cur_ix_masked]) << 1;
455
+ for (int k = 0; k < 2; ++k, ++key) {
456
+ ++num_dict_lookups_;
457
+ const uint16_t v = kStaticDictionaryHash[key];
458
+ if (v > 0) {
459
+ const int len = v & 31;
460
+ const int dist = v >> 5;
461
+ const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
462
+ if (len <= max_length) {
463
+ const int matchlen =
464
+ FindMatchLengthWithLimit(&data[cur_ix_masked],
465
+ &kBrotliDictionary[offset], len);
466
+ if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
467
+ const int transform_id = kCutoffTransforms[len - matchlen];
468
+ const int word_id =
469
+ transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
470
+ dist;
471
+ const size_t backward = max_backward + word_id + 1;
472
+ double score = BackwardReferenceScore(matchlen, backward);
473
+ if (best_score < score) {
474
+ ++num_dict_matches_;
475
+ best_score = score;
476
+ best_len = matchlen;
477
+ *best_len_out = best_len;
478
+ *best_len_code_out = len;
479
+ *best_distance_out = backward;
480
+ *best_score_out = best_score;
481
+ match_found = true;
482
+ }
483
+ }
484
+ }
485
+ }
486
+ }
487
+ }
488
+ return match_found;
489
+ }
490
+
491
+ // Similar to FindLongestMatch(), but finds all matches.
492
+ //
493
+ // Sets *num_matches to the number of matches found, and stores the found
494
+ // matches in matches[0] to matches[*num_matches - 1].
495
+ //
496
+ // If the longest match is longer than kMaxZopfliLen, returns only this
497
+ // longest match.
498
+ //
499
+ // Requires that at least kMaxZopfliLen space is available in matches.
500
+ void FindAllMatches(const uint8_t* data,
501
+ const size_t ring_buffer_mask,
502
+ const uint32_t cur_ix,
503
+ uint32_t max_length,
504
+ const uint32_t max_backward,
505
+ int* num_matches,
506
+ BackwardMatch* matches) const {
507
+ BackwardMatch* const orig_matches = matches;
508
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
509
+ int best_len = 1;
510
+ int stop = static_cast<int>(cur_ix) - 64;
511
+ if (stop < 0) { stop = 0; }
512
+ for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
513
+ size_t prev_ix = i;
514
+ const size_t backward = cur_ix - prev_ix;
515
+ if (PREDICT_FALSE(backward > max_backward)) {
516
+ break;
517
+ }
518
+ prev_ix &= ring_buffer_mask;
519
+ if (data[cur_ix_masked] != data[prev_ix] ||
520
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
521
+ continue;
522
+ }
523
+ const size_t len =
524
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
525
+ max_length);
526
+ if (len > best_len) {
527
+ best_len = len;
528
+ if (len > kMaxZopfliLen) {
529
+ matches = orig_matches;
530
+ }
531
+ *matches++ = BackwardMatch(backward, len);
532
+ }
533
+ }
534
+ const uint32_t key = HashBytes(&data[cur_ix_masked]);
535
+ const int * __restrict const bucket = &buckets_[key][0];
536
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
537
+ for (int i = num_[key] - 1; i >= down; --i) {
538
+ int prev_ix = bucket[i & kBlockMask];
539
+ if (prev_ix >= 0) {
540
+ const size_t backward = cur_ix - prev_ix;
541
+ if (PREDICT_FALSE(backward > max_backward)) {
542
+ break;
543
+ }
544
+ prev_ix &= ring_buffer_mask;
545
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
546
+ prev_ix + best_len > ring_buffer_mask ||
547
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
548
+ continue;
549
+ }
550
+ const size_t len =
551
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
552
+ max_length);
553
+ if (len > best_len) {
554
+ best_len = len;
555
+ if (len > kMaxZopfliLen) {
556
+ matches = orig_matches;
557
+ }
558
+ *matches++ = BackwardMatch(backward, len);
559
+ }
560
+ }
561
+ }
562
+ std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
563
+ int minlen = std::max<int>(4, best_len + 1);
564
+ if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
565
+ &dict_matches[0])) {
566
+ int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
567
+ for (int l = minlen; l <= maxlen; ++l) {
568
+ int dict_id = dict_matches[l];
569
+ if (dict_id < kInvalidMatch) {
570
+ *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
571
+ dict_id & 31);
572
+ }
573
+ }
574
+ }
575
+ *num_matches += matches - orig_matches;
576
+ }
577
+
578
+ enum { kHashLength = 4 };
579
+ enum { kHashTypeLength = 4 };
580
+
581
+ // HashBytes is the function that chooses the bucket to place
582
+ // the address in. The HashLongestMatch and HashLongestMatchQuickly
583
+ // classes have separate, different implementations of hashing.
584
+ static uint32_t HashBytes(const uint8_t *data) {
585
+ // kHashMul32 multiplier has these properties:
586
+ // * The multiplier must be odd. Otherwise we may lose the highest bit.
587
+ // * No long streaks of 1s or 0s.
588
+ // * Is not unfortunate (see the unittest) for the English language.
589
+ // * There is no effort to ensure that it is a prime, the oddity is enough
590
+ // for this use.
591
+ // * The number has been tuned heuristically against compression benchmarks.
592
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
593
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
594
+ // The higher bits contain more mixture from the multiplication,
595
+ // so we take our results from there.
596
+ return h >> (32 - kBucketBits);
597
+ }
598
+
599
+ private:
600
+ // Number of hash buckets.
601
+ static const uint32_t kBucketSize = 1 << kBucketBits;
602
+
603
+ // Only kBlockSize newest backward references are kept,
604
+ // and the older are forgotten.
605
+ static const uint32_t kBlockSize = 1 << kBlockBits;
606
+
607
+ // Mask for accessing entries in a block (in a ringbuffer manner).
608
+ static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
609
+
610
+ // Number of entries in a particular bucket.
611
+ uint16_t num_[kBucketSize];
612
+
613
+ // Buckets containing kBlockSize of backward references.
614
+ int buckets_[kBucketSize][kBlockSize];
615
+
616
+ size_t num_dict_lookups_;
617
+ size_t num_dict_matches_;
618
+ };
619
+
620
+ struct Hashers {
621
+ // For kBucketSweep == 1, enabling the dictionary lookup makes compression
622
+ // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
623
+ // and html inputs.
624
+ typedef HashLongestMatchQuickly<16, 1, true> H1;
625
+ typedef HashLongestMatchQuickly<16, 2, false> H2;
626
+ typedef HashLongestMatchQuickly<16, 4, false> H3;
627
+ typedef HashLongestMatchQuickly<17, 4, true> H4;
628
+ typedef HashLongestMatch<14, 4, 4> H5;
629
+ typedef HashLongestMatch<14, 5, 4> H6;
630
+ typedef HashLongestMatch<15, 6, 10> H7;
631
+ typedef HashLongestMatch<15, 7, 10> H8;
632
+ typedef HashLongestMatch<15, 8, 16> H9;
633
+
634
+ void Init(int type) {
635
+ switch (type) {
636
+ case 1: hash_h1.reset(new H1); break;
637
+ case 2: hash_h2.reset(new H2); break;
638
+ case 3: hash_h3.reset(new H3); break;
639
+ case 4: hash_h4.reset(new H4); break;
640
+ case 5: hash_h5.reset(new H5); break;
641
+ case 6: hash_h6.reset(new H6); break;
642
+ case 7: hash_h7.reset(new H7); break;
643
+ case 8: hash_h8.reset(new H8); break;
644
+ case 9: hash_h9.reset(new H9); break;
645
+ default: break;
646
+ }
647
+ }
648
+
649
+ template<typename Hasher>
650
+ void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
651
+ for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
652
+ hasher->Store(dict, i);
653
+ }
654
+ }
655
+
656
+ // Custom LZ77 window.
657
+ void PrependCustomDictionary(
658
+ int type, const size_t size, const uint8_t* dict) {
659
+ switch (type) {
660
+ case 1: WarmupHash(size, dict, hash_h1.get()); break;
661
+ case 2: WarmupHash(size, dict, hash_h2.get()); break;
662
+ case 3: WarmupHash(size, dict, hash_h3.get()); break;
663
+ case 4: WarmupHash(size, dict, hash_h4.get()); break;
664
+ case 5: WarmupHash(size, dict, hash_h5.get()); break;
665
+ case 6: WarmupHash(size, dict, hash_h6.get()); break;
666
+ case 7: WarmupHash(size, dict, hash_h7.get()); break;
667
+ case 8: WarmupHash(size, dict, hash_h8.get()); break;
668
+ case 9: WarmupHash(size, dict, hash_h9.get()); break;
669
+ default: break;
670
+ }
671
+ }
672
+
673
+ std::unique_ptr<H1> hash_h1;
674
+ std::unique_ptr<H2> hash_h2;
675
+ std::unique_ptr<H3> hash_h3;
676
+ std::unique_ptr<H4> hash_h4;
677
+ std::unique_ptr<H5> hash_h5;
678
+ std::unique_ptr<H6> hash_h6;
679
+ std::unique_ptr<H7> hash_h7;
680
+ std::unique_ptr<H8> hash_h8;
681
+ std::unique_ptr<H9> hash_h9;
682
+ };
683
+
684
+ } // namespace brotli
685
+
686
+ #endif // BROTLI_ENC_HASH_H_