extbrotli 0.0.1.PROTOTYPE

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +28 -0
  3. data/README.md +67 -0
  4. data/Rakefile +158 -0
  5. data/contrib/brotli/LICENSE +202 -0
  6. data/contrib/brotli/README.md +18 -0
  7. data/contrib/brotli/dec/bit_reader.c +55 -0
  8. data/contrib/brotli/dec/bit_reader.h +256 -0
  9. data/contrib/brotli/dec/context.h +260 -0
  10. data/contrib/brotli/dec/decode.c +1573 -0
  11. data/contrib/brotli/dec/decode.h +160 -0
  12. data/contrib/brotli/dec/dictionary.h +9494 -0
  13. data/contrib/brotli/dec/huffman.c +325 -0
  14. data/contrib/brotli/dec/huffman.h +77 -0
  15. data/contrib/brotli/dec/port.h +148 -0
  16. data/contrib/brotli/dec/prefix.h +756 -0
  17. data/contrib/brotli/dec/state.c +149 -0
  18. data/contrib/brotli/dec/state.h +185 -0
  19. data/contrib/brotli/dec/streams.c +99 -0
  20. data/contrib/brotli/dec/streams.h +100 -0
  21. data/contrib/brotli/dec/transform.h +315 -0
  22. data/contrib/brotli/dec/types.h +36 -0
  23. data/contrib/brotli/enc/backward_references.cc +769 -0
  24. data/contrib/brotli/enc/backward_references.h +50 -0
  25. data/contrib/brotli/enc/bit_cost.h +147 -0
  26. data/contrib/brotli/enc/block_splitter.cc +418 -0
  27. data/contrib/brotli/enc/block_splitter.h +78 -0
  28. data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
  29. data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
  30. data/contrib/brotli/enc/cluster.h +290 -0
  31. data/contrib/brotli/enc/command.h +140 -0
  32. data/contrib/brotli/enc/context.h +185 -0
  33. data/contrib/brotli/enc/dictionary.h +9485 -0
  34. data/contrib/brotli/enc/dictionary_hash.h +4125 -0
  35. data/contrib/brotli/enc/encode.cc +715 -0
  36. data/contrib/brotli/enc/encode.h +196 -0
  37. data/contrib/brotli/enc/encode_parallel.cc +354 -0
  38. data/contrib/brotli/enc/encode_parallel.h +37 -0
  39. data/contrib/brotli/enc/entropy_encode.cc +492 -0
  40. data/contrib/brotli/enc/entropy_encode.h +88 -0
  41. data/contrib/brotli/enc/fast_log.h +179 -0
  42. data/contrib/brotli/enc/find_match_length.h +87 -0
  43. data/contrib/brotli/enc/hash.h +686 -0
  44. data/contrib/brotli/enc/histogram.cc +76 -0
  45. data/contrib/brotli/enc/histogram.h +100 -0
  46. data/contrib/brotli/enc/literal_cost.cc +172 -0
  47. data/contrib/brotli/enc/literal_cost.h +38 -0
  48. data/contrib/brotli/enc/metablock.cc +544 -0
  49. data/contrib/brotli/enc/metablock.h +88 -0
  50. data/contrib/brotli/enc/port.h +151 -0
  51. data/contrib/brotli/enc/prefix.h +85 -0
  52. data/contrib/brotli/enc/ringbuffer.h +108 -0
  53. data/contrib/brotli/enc/static_dict.cc +441 -0
  54. data/contrib/brotli/enc/static_dict.h +40 -0
  55. data/contrib/brotli/enc/static_dict_lut.h +12063 -0
  56. data/contrib/brotli/enc/streams.cc +127 -0
  57. data/contrib/brotli/enc/streams.h +129 -0
  58. data/contrib/brotli/enc/transform.h +250 -0
  59. data/contrib/brotli/enc/write_bits.h +91 -0
  60. data/ext/extbrotli.cc +24 -0
  61. data/ext/extbrotli.h +73 -0
  62. data/ext/extconf.rb +35 -0
  63. data/ext/lldecoder.c +220 -0
  64. data/ext/llencoder.cc +433 -0
  65. data/gemstub.rb +21 -0
  66. data/lib/extbrotli.rb +243 -0
  67. data/lib/extbrotli/version.rb +3 -0
  68. metadata +140 -0
@@ -0,0 +1,88 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Entropy encoding (Huffman) utilities.
16
+
17
+ #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
18
+ #define BROTLI_ENC_ENTROPY_ENCODE_H_
19
+
20
+ #include <stdint.h>
21
+ #include <string.h>
22
+ #include <vector>
23
+ #include "./histogram.h"
24
+ #include "./prefix.h"
25
+
26
+ namespace brotli {
27
+
28
+ // This function will create a Huffman tree.
29
+ //
30
+ // The (data,length) contains the population counts.
31
+ // The tree_limit is the maximum bit depth of the Huffman codes.
32
+ //
33
+ // The depth contains the tree, i.e., how many bits are used for
34
+ // the symbol.
35
+ //
36
+ // See http://en.wikipedia.org/wiki/Huffman_coding
37
+ void CreateHuffmanTree(const int *data,
38
+ const int length,
39
+ const int tree_limit,
40
+ uint8_t *depth);
41
+
42
+ // Change the population counts in a way that the consequent
43
+ // Huffman tree compression, especially its rle-part will be more
44
+ // likely to compress this data more efficiently.
45
+ //
46
+ // length contains the size of the histogram.
47
+ // counts contains the population counts.
48
+ int OptimizeHuffmanCountsForRle(int length, int* counts);
49
+
50
+ // Write a huffman tree from bit depths into the bitstream representation
51
+ // of a Huffman tree. The generated Huffman tree is to be compressed once
52
+ // more using a Huffman tree
53
+ void WriteHuffmanTree(const uint8_t* depth,
54
+ uint32_t num,
55
+ std::vector<uint8_t> *tree,
56
+ std::vector<uint8_t> *extra_bits_data);
57
+
58
+ // Get the actual bit values for a tree of bit depths.
59
+ void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
60
+
61
+ template<int kSize>
62
+ struct EntropyCode {
63
+ // How many bits for symbol.
64
+ uint8_t depth_[kSize];
65
+ // Actual bits used to represent the symbol.
66
+ uint16_t bits_[kSize];
67
+ // How many non-zero depth.
68
+ int count_;
69
+ // First four symbols with non-zero depth.
70
+ int symbols_[4];
71
+ };
72
+
73
+ static const int kCodeLengthCodes = 18;
74
+
75
+ // Literal entropy code.
76
+ typedef EntropyCode<256> EntropyCodeLiteral;
77
+ // Prefix entropy codes.
78
+ typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
79
+ typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
80
+ typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
81
+ // Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
82
+ typedef EntropyCode<272> EntropyCodeContextMap;
83
+ // Block type entropy code, 256 block types + 2 special symbols.
84
+ typedef EntropyCode<258> EntropyCodeBlockType;
85
+
86
+ } // namespace brotli
87
+
88
+ #endif // BROTLI_ENC_ENTROPY_ENCODE_H_
@@ -0,0 +1,179 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Utilities for fast computation of logarithms.
16
+
17
+ #ifndef BROTLI_ENC_FAST_LOG_H_
18
+ #define BROTLI_ENC_FAST_LOG_H_
19
+
20
+ #include <assert.h>
21
+ #include <math.h>
22
+ #include <stdint.h>
23
+
24
+ namespace brotli {
25
+
26
+ // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
27
+ inline int Log2Floor(uint32_t n) {
28
+ #if defined(__clang__) || \
29
+ (defined(__GNUC__) && \
30
+ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
31
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
32
+ #else
33
+ if (n == 0)
34
+ return -1;
35
+ int log = 0;
36
+ uint32_t value = n;
37
+ for (int i = 4; i >= 0; --i) {
38
+ int shift = (1 << i);
39
+ uint32_t x = value >> shift;
40
+ if (x != 0) {
41
+ value = x;
42
+ log += shift;
43
+ }
44
+ }
45
+ assert(value == 1);
46
+ return log;
47
+ #endif
48
+ }
49
+
50
+ static inline int Log2FloorNonZero(uint32_t n) {
51
+ #ifdef __GNUC__
52
+ return 31 ^ __builtin_clz(n);
53
+ #else
54
+ unsigned int result = 0;
55
+ while (n >>= 1) result++;
56
+ return result;
57
+ #endif
58
+ }
59
+
60
+ // Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
61
+ inline int Log2Ceiling(uint32_t n) {
62
+ int floor = Log2Floor(n);
63
+ if (n == (n &~ (n - 1))) // zero or a power of two
64
+ return floor;
65
+ else
66
+ return floor + 1;
67
+ }
68
+
69
+ // A lookup table for small values of log2(int) to be used in entropy
70
+ // computation.
71
+ //
72
+ // ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
73
+ static const float kLog2Table[] = {
74
+ 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
75
+ 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
76
+ 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
77
+ 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
78
+ 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
79
+ 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
80
+ 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
81
+ 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
82
+ 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
83
+ 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
84
+ 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
85
+ 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
86
+ 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
87
+ 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
88
+ 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
89
+ 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
90
+ 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
91
+ 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
92
+ 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
93
+ 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
94
+ 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
95
+ 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
96
+ 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
97
+ 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
98
+ 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
99
+ 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
100
+ 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
101
+ 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
102
+ 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
103
+ 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
104
+ 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
105
+ 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
106
+ 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
107
+ 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
108
+ 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
109
+ 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
110
+ 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
111
+ 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
112
+ 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
113
+ 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
114
+ 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
115
+ 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
116
+ 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
117
+ 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
118
+ 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
119
+ 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
120
+ 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
121
+ 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
122
+ 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
123
+ 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
124
+ 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
125
+ 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
126
+ 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
127
+ 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
128
+ 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
129
+ 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
130
+ 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
131
+ 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
132
+ 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
133
+ 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
134
+ 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
135
+ 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
136
+ 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
137
+ 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
138
+ 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
139
+ 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
140
+ 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
141
+ 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
142
+ 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
143
+ 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
144
+ 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
145
+ 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
146
+ 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
147
+ 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
148
+ 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
149
+ 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
150
+ 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
151
+ 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
152
+ 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
153
+ 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
154
+ 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
155
+ 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
156
+ 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
157
+ 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
158
+ 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
159
+ 7.9943534368588578f
160
+ };
161
+
162
+ // Faster logarithm for small integers, with the property of log2(0) == 0.
163
+ static inline double FastLog2(int v) {
164
+ if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
165
+ return kLog2Table[v];
166
+ }
167
+ #if defined(_MSC_VER) && _MSC_VER <= 1600
168
+ // Visual Studio 2010 does not have the log2() function defined, so we use
169
+ // log() and a multiplication instead.
170
+ static const double kLog2Inv = 1.4426950408889634f;
171
+ return log(static_cast<double>(v)) * kLog2Inv;
172
+ #else
173
+ return log2(static_cast<double>(v));
174
+ #endif
175
+ }
176
+
177
+ } // namespace brotli
178
+
179
+ #endif // BROTLI_ENC_FAST_LOG_H_
@@ -0,0 +1,87 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Function to find maximal matching prefixes of strings.
16
+
17
+ #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
18
+ #define BROTLI_ENC_FIND_MATCH_LENGTH_H_
19
+
20
+ #include <stdint.h>
21
+
22
+ #include <stddef.h>
23
+
24
+ #include "./port.h"
25
+
26
+ namespace brotli {
27
+
28
+ // Separate implementation for little-endian 64-bit targets, for speed.
29
+ #if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
30
+
31
+ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
32
+ const uint8_t* s2,
33
+ size_t limit) {
34
+ int matched = 0;
35
+ size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
36
+ while (PREDICT_TRUE(--limit2)) {
37
+ if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
38
+ BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
39
+ s2 += 8;
40
+ matched += 8;
41
+ } else {
42
+ uint64_t x =
43
+ BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
44
+ int matching_bits = __builtin_ctzll(x);
45
+ matched += matching_bits >> 3;
46
+ return matched;
47
+ }
48
+ }
49
+ limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
50
+ while (--limit) {
51
+ if (PREDICT_TRUE(s1[matched] == *s2)) {
52
+ ++s2;
53
+ ++matched;
54
+ } else {
55
+ return matched;
56
+ }
57
+ }
58
+ return matched;
59
+ }
60
+ #else
61
+ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
62
+ const uint8_t* s2,
63
+ size_t limit) {
64
+ int matched = 0;
65
+ const uint8_t* s2_limit = s2 + limit;
66
+ const uint8_t* s2_ptr = s2;
67
+ // Find out how long the match is. We loop over the data 32 bits at a
68
+ // time until we find a 32-bit block that doesn't match; then we find
69
+ // the first non-matching bit and use that to calculate the total
70
+ // length of the match.
71
+ while (s2_ptr <= s2_limit - 4 &&
72
+ BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
73
+ BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
74
+ s2_ptr += 4;
75
+ matched += 4;
76
+ }
77
+ while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
78
+ ++s2_ptr;
79
+ ++matched;
80
+ }
81
+ return matched;
82
+ }
83
+ #endif
84
+
85
+ } // namespace brotli
86
+
87
+ #endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
@@ -0,0 +1,686 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // A (forgetful) hash table to the data seen by the compressor, to
16
+ // help create backward references to previous data.
17
+
18
+ #ifndef BROTLI_ENC_HASH_H_
19
+ #define BROTLI_ENC_HASH_H_
20
+
21
+ #include <stddef.h>
22
+ #include <stdint.h>
23
+ #include <string.h>
24
+ #include <sys/types.h>
25
+ #include <algorithm>
26
+ #include <cstdlib>
27
+ #include <memory>
28
+ #include <string>
29
+
30
+ #include "./dictionary_hash.h"
31
+ #include "./fast_log.h"
32
+ #include "./find_match_length.h"
33
+ #include "./port.h"
34
+ #include "./prefix.h"
35
+ #include "./static_dict.h"
36
+ #include "./transform.h"
37
+
38
+ namespace brotli {
39
+
40
+ static const int kDistanceCacheIndex[] = {
41
+ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
42
+ };
43
+ static const int kDistanceCacheOffset[] = {
44
+ 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
45
+ };
46
+
47
+ static const int kCutoffTransformsCount = 10;
48
+ static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
49
+
50
+ // kHashMul32 multiplier has these properties:
51
+ // * The multiplier must be odd. Otherwise we may lose the highest bit.
52
+ // * No long streaks of 1s or 0s.
53
+ // * There is no effort to ensure that it is a prime, the oddity is enough
54
+ // for this use.
55
+ // * The number has been tuned heuristically against compression benchmarks.
56
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
57
+
58
+ template<int kShiftBits>
59
+ inline uint32_t Hash(const uint8_t *data) {
60
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
61
+ // The higher bits contain more mixture from the multiplication,
62
+ // so we take our results from there.
63
+ return h >> (32 - kShiftBits);
64
+ }
65
+
66
+ // Usually, we always choose the longest backward reference. This function
67
+ // allows for the exception of that rule.
68
+ //
69
+ // If we choose a backward reference that is further away, it will
70
+ // usually be coded with more bits. We approximate this by assuming
71
+ // log2(distance). If the distance can be expressed in terms of the
72
+ // last four distances, we use some heuristic constants to estimate
73
+ // the bits cost. For the first up to four literals we use the bit
74
+ // cost of the literals from the literal cost model, after that we
75
+ // use the average bit cost of the cost model.
76
+ //
77
+ // This function is used to sometimes discard a longer backward reference
78
+ // when it is not much longer and the bit cost for encoding it is more
79
+ // than the saved literals.
80
+ inline double BackwardReferenceScore(int copy_length,
81
+ int backward_reference_offset) {
82
+ return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
83
+ }
84
+
85
+ inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
86
+ int distance_short_code) {
87
+ static const double kDistanceShortCodeBitCost[16] = {
88
+ -0.6, 0.95, 1.17, 1.27,
89
+ 0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
90
+ 1.05, 1.05, 1.15, 1.15, 1.25, 1.25
91
+ };
92
+ return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
93
+ }
94
+
95
+ struct BackwardMatch {
96
+ BackwardMatch() : distance(0), length_and_code(0) {}
97
+
98
+ BackwardMatch(int dist, int len)
99
+ : distance(dist), length_and_code((len << 5)) {}
100
+
101
+ BackwardMatch(int dist, int len, int len_code)
102
+ : distance(dist),
103
+ length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
104
+
105
+ int length() const {
106
+ return length_and_code >> 5;
107
+ }
108
+ int length_code() const {
109
+ int code = length_and_code & 31;
110
+ return code ? code : length();
111
+ }
112
+
113
+ int distance;
114
+ int length_and_code;
115
+ };
116
+
117
+ // A (forgetful) hash table to the data seen by the compressor, to
118
+ // help create backward references to previous data.
119
+ //
120
+ // This is a hash map of fixed size (kBucketSize). Starting from the
121
+ // given index, kBucketSweep buckets are used to store values of a key.
122
+ template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
123
+ class HashLongestMatchQuickly {
124
+ public:
125
+ HashLongestMatchQuickly() {
126
+ Reset();
127
+ }
128
+ void Reset() {
129
+ // It is not strictly necessary to fill this buffer here, but
130
+ // not filling will make the results of the compression stochastic
131
+ // (but correct). This is because random data would cause the
132
+ // system to find accidentally good backward references here and there.
133
+ memset(&buckets_[0], 0, sizeof(buckets_));
134
+ num_dict_lookups_ = 0;
135
+ num_dict_matches_ = 0;
136
+ }
137
+ // Look at 4 bytes at data.
138
+ // Compute a hash from these, and store the value somewhere within
139
+ // [ix .. ix+3].
140
+ inline void Store(const uint8_t *data, const int ix) {
141
+ const uint32_t key = HashBytes(data);
142
+ // Wiggle the value with the bucket sweep range.
143
+ const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
144
+ buckets_[key + off] = ix;
145
+ }
146
+
147
+ // Store hashes for a range of data.
148
+ void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
149
+ for (int p = 0; p < len; ++p) {
150
+ Store(&data[p & mask], startix + p);
151
+ }
152
+ }
153
+
154
+ // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
155
+ // up to the length of max_length.
156
+ //
157
+ // Does not look for matches longer than max_length.
158
+ // Does not look for matches further away than max_backward.
159
+ // Writes the best found match length into best_len_out.
160
+ // Writes the index (&data[index]) of the start of the best match into
161
+ // best_distance_out.
162
+ inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
163
+ const size_t ring_buffer_mask,
164
+ const int* __restrict distance_cache,
165
+ const uint32_t cur_ix,
166
+ const uint32_t max_length,
167
+ const uint32_t max_backward,
168
+ int * __restrict best_len_out,
169
+ int * __restrict best_len_code_out,
170
+ int * __restrict best_distance_out,
171
+ double* __restrict best_score_out) {
172
+ const int best_len_in = *best_len_out;
173
+ const int cur_ix_masked = cur_ix & ring_buffer_mask;
174
+ int compare_char = ring_buffer[cur_ix_masked + best_len_in];
175
+ double best_score = *best_score_out;
176
+ int best_len = best_len_in;
177
+ int backward = distance_cache[0];
178
+ size_t prev_ix = cur_ix - backward;
179
+ bool match_found = false;
180
+ if (prev_ix < cur_ix) {
181
+ prev_ix &= ring_buffer_mask;
182
+ if (compare_char == ring_buffer[prev_ix + best_len]) {
183
+ int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
184
+ &ring_buffer[cur_ix_masked],
185
+ max_length);
186
+ if (len >= 4) {
187
+ best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
188
+ best_len = len;
189
+ *best_len_out = len;
190
+ *best_len_code_out = len;
191
+ *best_distance_out = backward;
192
+ *best_score_out = best_score;
193
+ compare_char = ring_buffer[cur_ix_masked + best_len];
194
+ if (kBucketSweep == 1) {
195
+ return true;
196
+ } else {
197
+ match_found = true;
198
+ }
199
+ }
200
+ }
201
+ }
202
+ const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
203
+ if (kBucketSweep == 1) {
204
+ // Only one to look for, don't bother to prepare for a loop.
205
+ prev_ix = buckets_[key];
206
+ backward = cur_ix - prev_ix;
207
+ prev_ix &= ring_buffer_mask;
208
+ if (compare_char != ring_buffer[prev_ix + best_len_in]) {
209
+ return false;
210
+ }
211
+ if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
212
+ return false;
213
+ }
214
+ const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
215
+ &ring_buffer[cur_ix_masked],
216
+ max_length);
217
+ if (len >= 4) {
218
+ *best_len_out = len;
219
+ *best_len_code_out = len;
220
+ *best_distance_out = backward;
221
+ *best_score_out = BackwardReferenceScore(len, backward);
222
+ return true;
223
+ }
224
+ } else {
225
+ uint32_t *bucket = buckets_ + key;
226
+ prev_ix = *bucket++;
227
+ for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
228
+ const int backward = cur_ix - prev_ix;
229
+ prev_ix &= ring_buffer_mask;
230
+ if (compare_char != ring_buffer[prev_ix + best_len]) {
231
+ continue;
232
+ }
233
+ if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
234
+ continue;
235
+ }
236
+ const int len =
237
+ FindMatchLengthWithLimit(&ring_buffer[prev_ix],
238
+ &ring_buffer[cur_ix_masked],
239
+ max_length);
240
+ if (len >= 4) {
241
+ const double score = BackwardReferenceScore(len, backward);
242
+ if (best_score < score) {
243
+ best_score = score;
244
+ best_len = len;
245
+ *best_len_out = best_len;
246
+ *best_len_code_out = best_len;
247
+ *best_distance_out = backward;
248
+ *best_score_out = score;
249
+ compare_char = ring_buffer[cur_ix_masked + best_len];
250
+ match_found = true;
251
+ }
252
+ }
253
+ }
254
+ }
255
+ if (kUseDictionary && !match_found &&
256
+ num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
257
+ ++num_dict_lookups_;
258
+ const uint32_t key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
259
+ const uint16_t v = kStaticDictionaryHash[key];
260
+ if (v > 0) {
261
+ const int len = v & 31;
262
+ const int dist = v >> 5;
263
+ const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
264
+ if (len <= max_length) {
265
+ const int matchlen =
266
+ FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
267
+ &kBrotliDictionary[offset], len);
268
+ if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
269
+ const int transform_id = kCutoffTransforms[len - matchlen];
270
+ const int word_id =
271
+ transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
272
+ dist;
273
+ const size_t backward = max_backward + word_id + 1;
274
+ const double score = BackwardReferenceScore(matchlen, backward);
275
+ if (best_score < score) {
276
+ ++num_dict_matches_;
277
+ best_score = score;
278
+ best_len = matchlen;
279
+ *best_len_out = best_len;
280
+ *best_len_code_out = len;
281
+ *best_distance_out = backward;
282
+ *best_score_out = best_score;
283
+ return true;
284
+ }
285
+ }
286
+ }
287
+ }
288
+ }
289
+ return match_found;
290
+ }
291
+
292
+ enum { kHashLength = 5 };
293
+ enum { kHashTypeLength = 8 };
294
+ // HashBytes is the function that chooses the bucket to place
295
+ // the address in. The HashLongestMatch and HashLongestMatchQuickly
296
+ // classes have separate, different implementations of hashing.
297
+ static uint32_t HashBytes(const uint8_t *data) {
298
+ // Computing a hash based on 5 bytes works much better for
299
+ // qualities 1 and 3, where the next hash value is likely to replace
300
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
301
+ uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
302
+ // The higher bits contain more mixture from the multiplication,
303
+ // so we take our results from there.
304
+ return h >> (64 - kBucketBits);
305
+ }
306
+
307
+ private:
308
+ static const uint32_t kBucketSize = 1 << kBucketBits;
309
+ uint32_t buckets_[kBucketSize + kBucketSweep];
310
+ size_t num_dict_lookups_;
311
+ size_t num_dict_matches_;
312
+ };
313
+
314
+ // The maximum length for which the zopflification uses distinct distances.
315
+ static const int kMaxZopfliLen = 325;
316
+
317
+ // A (forgetful) hash table to the data seen by the compressor, to
318
+ // help create backward references to previous data.
319
+ //
320
+ // This is a hash map of fixed size (kBucketSize) to a ring buffer of
321
+ // fixed size (kBlockSize). The ring buffer contains the last kBlockSize
322
+ // index positions of the given hash key in the compressed data.
323
+ template <int kBucketBits,
324
+ int kBlockBits,
325
+ int kNumLastDistancesToCheck>
326
+ class HashLongestMatch {
327
+ public:
328
+ HashLongestMatch() {
329
+ Reset();
330
+ }
331
+
332
+ void Reset() {
333
+ memset(&num_[0], 0, sizeof(num_));
334
+ num_dict_lookups_ = 0;
335
+ num_dict_matches_ = 0;
336
+ }
337
+
338
+ // Look at 3 bytes at data.
339
+ // Compute a hash from these, and store the value of ix at that position.
340
+ inline void Store(const uint8_t *data, const int ix) {
341
+ const uint32_t key = HashBytes(data);
342
+ const int minor_ix = num_[key] & kBlockMask;
343
+ buckets_[key][minor_ix] = ix;
344
+ ++num_[key];
345
+ }
346
+
347
+ // Store hashes for a range of data.
348
+ void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
349
+ for (int p = 0; p < len; ++p) {
350
+ Store(&data[p & mask], startix + p);
351
+ }
352
+ }
353
+
354
+ // Find a longest backward match of &data[cur_ix] up to the length of
355
+ // max_length.
356
+ //
357
+ // Does not look for matches longer than max_length.
358
+ // Does not look for matches further away than max_backward.
359
+ // Writes the best found match length into best_len_out.
360
+ // Writes the index (&data[index]) offset from the start of the best match
361
+ // into best_distance_out.
362
+ // Write the score of the best match into best_score_out.
363
+ bool FindLongestMatch(const uint8_t * __restrict data,
364
+ const size_t ring_buffer_mask,
365
+ const int* __restrict distance_cache,
366
+ const uint32_t cur_ix,
367
+ uint32_t max_length,
368
+ const uint32_t max_backward,
369
+ int * __restrict best_len_out,
370
+ int * __restrict best_len_code_out,
371
+ int * __restrict best_distance_out,
372
+ double * __restrict best_score_out) {
373
+ *best_len_code_out = 0;
374
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
375
+ bool match_found = false;
376
+ // Don't accept a short copy from far away.
377
+ double best_score = *best_score_out;
378
+ int best_len = *best_len_out;
379
+ *best_len_out = 0;
380
+ // Try last distance first.
381
+ for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
382
+ const int idx = kDistanceCacheIndex[i];
383
+ const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
384
+ size_t prev_ix = cur_ix - backward;
385
+ if (prev_ix >= cur_ix) {
386
+ continue;
387
+ }
388
+ if (PREDICT_FALSE(backward > max_backward)) {
389
+ continue;
390
+ }
391
+ prev_ix &= ring_buffer_mask;
392
+
393
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
394
+ prev_ix + best_len > ring_buffer_mask ||
395
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
396
+ continue;
397
+ }
398
+ const size_t len =
399
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
400
+ max_length);
401
+ if (len >= 3 || (len == 2 && i < 2)) {
402
+ // Comparing for >= 2 does not change the semantics, but just saves for
403
+ // a few unnecessary binary logarithms in backward reference score,
404
+ // since we are not interested in such short matches.
405
+ double score = BackwardReferenceScoreUsingLastDistance(len, i);
406
+ if (best_score < score) {
407
+ best_score = score;
408
+ best_len = len;
409
+ *best_len_out = best_len;
410
+ *best_len_code_out = best_len;
411
+ *best_distance_out = backward;
412
+ *best_score_out = best_score;
413
+ match_found = true;
414
+ }
415
+ }
416
+ }
417
+ const uint32_t key = HashBytes(&data[cur_ix_masked]);
418
+ const int * __restrict const bucket = &buckets_[key][0];
419
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
420
+ for (int i = num_[key] - 1; i >= down; --i) {
421
+ int prev_ix = bucket[i & kBlockMask];
422
+ if (prev_ix >= 0) {
423
+ const size_t backward = cur_ix - prev_ix;
424
+ if (PREDICT_FALSE(backward > max_backward)) {
425
+ break;
426
+ }
427
+ prev_ix &= ring_buffer_mask;
428
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
429
+ prev_ix + best_len > ring_buffer_mask ||
430
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
431
+ continue;
432
+ }
433
+ const size_t len =
434
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
435
+ max_length);
436
+ if (len >= 4) {
437
+ // Comparing for >= 3 does not change the semantics, but just saves
438
+ // for a few unnecessary binary logarithms in backward reference
439
+ // score, since we are not interested in such short matches.
440
+ double score = BackwardReferenceScore(len, backward);
441
+ if (best_score < score) {
442
+ best_score = score;
443
+ best_len = len;
444
+ *best_len_out = best_len;
445
+ *best_len_code_out = best_len;
446
+ *best_distance_out = backward;
447
+ *best_score_out = best_score;
448
+ match_found = true;
449
+ }
450
+ }
451
+ }
452
+ }
453
+ if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
454
+ uint32_t key = Hash<14>(&data[cur_ix_masked]) << 1;
455
+ for (int k = 0; k < 2; ++k, ++key) {
456
+ ++num_dict_lookups_;
457
+ const uint16_t v = kStaticDictionaryHash[key];
458
+ if (v > 0) {
459
+ const int len = v & 31;
460
+ const int dist = v >> 5;
461
+ const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
462
+ if (len <= max_length) {
463
+ const int matchlen =
464
+ FindMatchLengthWithLimit(&data[cur_ix_masked],
465
+ &kBrotliDictionary[offset], len);
466
+ if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
467
+ const int transform_id = kCutoffTransforms[len - matchlen];
468
+ const int word_id =
469
+ transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
470
+ dist;
471
+ const size_t backward = max_backward + word_id + 1;
472
+ double score = BackwardReferenceScore(matchlen, backward);
473
+ if (best_score < score) {
474
+ ++num_dict_matches_;
475
+ best_score = score;
476
+ best_len = matchlen;
477
+ *best_len_out = best_len;
478
+ *best_len_code_out = len;
479
+ *best_distance_out = backward;
480
+ *best_score_out = best_score;
481
+ match_found = true;
482
+ }
483
+ }
484
+ }
485
+ }
486
+ }
487
+ }
488
+ return match_found;
489
+ }
490
+
491
+ // Similar to FindLongestMatch(), but finds all matches.
492
+ //
493
+ // Sets *num_matches to the number of matches found, and stores the found
494
+ // matches in matches[0] to matches[*num_matches - 1].
495
+ //
496
+ // If the longest match is longer than kMaxZopfliLen, returns only this
497
+ // longest match.
498
+ //
499
+ // Requires that at least kMaxZopfliLen space is available in matches.
500
+ void FindAllMatches(const uint8_t* data,
501
+ const size_t ring_buffer_mask,
502
+ const uint32_t cur_ix,
503
+ uint32_t max_length,
504
+ const uint32_t max_backward,
505
+ int* num_matches,
506
+ BackwardMatch* matches) const {
507
+ BackwardMatch* const orig_matches = matches;
508
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
509
+ int best_len = 1;
510
+ int stop = static_cast<int>(cur_ix) - 64;
511
+ if (stop < 0) { stop = 0; }
512
+ for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
513
+ size_t prev_ix = i;
514
+ const size_t backward = cur_ix - prev_ix;
515
+ if (PREDICT_FALSE(backward > max_backward)) {
516
+ break;
517
+ }
518
+ prev_ix &= ring_buffer_mask;
519
+ if (data[cur_ix_masked] != data[prev_ix] ||
520
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
521
+ continue;
522
+ }
523
+ const size_t len =
524
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
525
+ max_length);
526
+ if (len > best_len) {
527
+ best_len = len;
528
+ if (len > kMaxZopfliLen) {
529
+ matches = orig_matches;
530
+ }
531
+ *matches++ = BackwardMatch(backward, len);
532
+ }
533
+ }
534
+ const uint32_t key = HashBytes(&data[cur_ix_masked]);
535
+ const int * __restrict const bucket = &buckets_[key][0];
536
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
537
+ for (int i = num_[key] - 1; i >= down; --i) {
538
+ int prev_ix = bucket[i & kBlockMask];
539
+ if (prev_ix >= 0) {
540
+ const size_t backward = cur_ix - prev_ix;
541
+ if (PREDICT_FALSE(backward > max_backward)) {
542
+ break;
543
+ }
544
+ prev_ix &= ring_buffer_mask;
545
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
546
+ prev_ix + best_len > ring_buffer_mask ||
547
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
548
+ continue;
549
+ }
550
+ const size_t len =
551
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
552
+ max_length);
553
+ if (len > best_len) {
554
+ best_len = len;
555
+ if (len > kMaxZopfliLen) {
556
+ matches = orig_matches;
557
+ }
558
+ *matches++ = BackwardMatch(backward, len);
559
+ }
560
+ }
561
+ }
562
+ std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
563
+ int minlen = std::max<int>(4, best_len + 1);
564
+ if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
565
+ &dict_matches[0])) {
566
+ int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
567
+ for (int l = minlen; l <= maxlen; ++l) {
568
+ int dict_id = dict_matches[l];
569
+ if (dict_id < kInvalidMatch) {
570
+ *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
571
+ dict_id & 31);
572
+ }
573
+ }
574
+ }
575
+ *num_matches += matches - orig_matches;
576
+ }
577
+
578
+ enum { kHashLength = 4 };
579
+ enum { kHashTypeLength = 4 };
580
+
581
+ // HashBytes is the function that chooses the bucket to place
582
+ // the address in. The HashLongestMatch and HashLongestMatchQuickly
583
+ // classes have separate, different implementations of hashing.
584
+ static uint32_t HashBytes(const uint8_t *data) {
585
+ // kHashMul32 multiplier has these properties:
586
+ // * The multiplier must be odd. Otherwise we may lose the highest bit.
587
+ // * No long streaks of 1s or 0s.
588
+ // * Is not unfortunate (see the unittest) for the English language.
589
+ // * There is no effort to ensure that it is a prime, the oddity is enough
590
+ // for this use.
591
+ // * The number has been tuned heuristically against compression benchmarks.
592
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
593
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
594
+ // The higher bits contain more mixture from the multiplication,
595
+ // so we take our results from there.
596
+ return h >> (32 - kBucketBits);
597
+ }
598
+
599
+ private:
600
+ // Number of hash buckets.
601
+ static const uint32_t kBucketSize = 1 << kBucketBits;
602
+
603
+ // Only kBlockSize newest backward references are kept,
604
+ // and the older are forgotten.
605
+ static const uint32_t kBlockSize = 1 << kBlockBits;
606
+
607
+ // Mask for accessing entries in a block (in a ringbuffer manner).
608
+ static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
609
+
610
+ // Number of entries in a particular bucket.
611
+ uint16_t num_[kBucketSize];
612
+
613
+ // Buckets containing kBlockSize of backward references.
614
+ int buckets_[kBucketSize][kBlockSize];
615
+
616
+ size_t num_dict_lookups_;
617
+ size_t num_dict_matches_;
618
+ };
619
+
620
+ struct Hashers {
621
+ // For kBucketSweep == 1, enabling the dictionary lookup makes compression
622
+ // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
623
+ // and html inputs.
624
+ typedef HashLongestMatchQuickly<16, 1, true> H1;
625
+ typedef HashLongestMatchQuickly<16, 2, false> H2;
626
+ typedef HashLongestMatchQuickly<16, 4, false> H3;
627
+ typedef HashLongestMatchQuickly<17, 4, true> H4;
628
+ typedef HashLongestMatch<14, 4, 4> H5;
629
+ typedef HashLongestMatch<14, 5, 4> H6;
630
+ typedef HashLongestMatch<15, 6, 10> H7;
631
+ typedef HashLongestMatch<15, 7, 10> H8;
632
+ typedef HashLongestMatch<15, 8, 16> H9;
633
+
634
+ void Init(int type) {
635
+ switch (type) {
636
+ case 1: hash_h1.reset(new H1); break;
637
+ case 2: hash_h2.reset(new H2); break;
638
+ case 3: hash_h3.reset(new H3); break;
639
+ case 4: hash_h4.reset(new H4); break;
640
+ case 5: hash_h5.reset(new H5); break;
641
+ case 6: hash_h6.reset(new H6); break;
642
+ case 7: hash_h7.reset(new H7); break;
643
+ case 8: hash_h8.reset(new H8); break;
644
+ case 9: hash_h9.reset(new H9); break;
645
+ default: break;
646
+ }
647
+ }
648
+
649
+ template<typename Hasher>
650
+ void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
651
+ for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
652
+ hasher->Store(dict, i);
653
+ }
654
+ }
655
+
656
+ // Custom LZ77 window.
657
+ void PrependCustomDictionary(
658
+ int type, const size_t size, const uint8_t* dict) {
659
+ switch (type) {
660
+ case 1: WarmupHash(size, dict, hash_h1.get()); break;
661
+ case 2: WarmupHash(size, dict, hash_h2.get()); break;
662
+ case 3: WarmupHash(size, dict, hash_h3.get()); break;
663
+ case 4: WarmupHash(size, dict, hash_h4.get()); break;
664
+ case 5: WarmupHash(size, dict, hash_h5.get()); break;
665
+ case 6: WarmupHash(size, dict, hash_h6.get()); break;
666
+ case 7: WarmupHash(size, dict, hash_h7.get()); break;
667
+ case 8: WarmupHash(size, dict, hash_h8.get()); break;
668
+ case 9: WarmupHash(size, dict, hash_h9.get()); break;
669
+ default: break;
670
+ }
671
+ }
672
+
673
+ std::unique_ptr<H1> hash_h1;
674
+ std::unique_ptr<H2> hash_h2;
675
+ std::unique_ptr<H3> hash_h3;
676
+ std::unique_ptr<H4> hash_h4;
677
+ std::unique_ptr<H5> hash_h5;
678
+ std::unique_ptr<H6> hash_h6;
679
+ std::unique_ptr<H7> hash_h7;
680
+ std::unique_ptr<H8> hash_h8;
681
+ std::unique_ptr<H9> hash_h9;
682
+ };
683
+
684
+ } // namespace brotli
685
+
686
+ #endif // BROTLI_ENC_HASH_H_