brotli 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +11 -3
  4. data/Gemfile +2 -0
  5. data/ext/brotli/brotli.c +279 -0
  6. data/ext/brotli/brotli.h +2 -0
  7. data/ext/brotli/buffer.c +95 -0
  8. data/ext/brotli/buffer.h +19 -0
  9. data/ext/brotli/extconf.rb +21 -81
  10. data/lib/brotli/version.rb +1 -1
  11. data/vendor/brotli/dec/bit_reader.c +5 -5
  12. data/vendor/brotli/dec/bit_reader.h +15 -15
  13. data/vendor/brotli/dec/context.h +1 -1
  14. data/vendor/brotli/dec/decode.c +433 -348
  15. data/vendor/brotli/dec/decode.h +74 -48
  16. data/vendor/brotli/dec/huffman.c +5 -4
  17. data/vendor/brotli/dec/huffman.h +4 -4
  18. data/vendor/brotli/dec/port.h +2 -95
  19. data/vendor/brotli/dec/prefix.h +5 -3
  20. data/vendor/brotli/dec/state.c +15 -27
  21. data/vendor/brotli/dec/state.h +21 -17
  22. data/vendor/brotli/dec/transform.h +1 -1
  23. data/vendor/brotli/enc/backward_references.c +892 -0
  24. data/vendor/brotli/enc/backward_references.h +85 -102
  25. data/vendor/brotli/enc/backward_references_inc.h +147 -0
  26. data/vendor/brotli/enc/bit_cost.c +35 -0
  27. data/vendor/brotli/enc/bit_cost.h +23 -121
  28. data/vendor/brotli/enc/bit_cost_inc.h +127 -0
  29. data/vendor/brotli/enc/block_encoder_inc.h +33 -0
  30. data/vendor/brotli/enc/block_splitter.c +197 -0
  31. data/vendor/brotli/enc/block_splitter.h +40 -50
  32. data/vendor/brotli/enc/block_splitter_inc.h +432 -0
  33. data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
  34. data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
  35. data/vendor/brotli/enc/cluster.c +56 -0
  36. data/vendor/brotli/enc/cluster.h +23 -305
  37. data/vendor/brotli/enc/cluster_inc.h +315 -0
  38. data/vendor/brotli/enc/command.h +83 -76
  39. data/vendor/brotli/enc/compress_fragment.c +747 -0
  40. data/vendor/brotli/enc/compress_fragment.h +48 -37
  41. data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
  42. data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
  43. data/vendor/brotli/enc/compressor.cc +139 -0
  44. data/vendor/brotli/enc/compressor.h +146 -0
  45. data/vendor/brotli/enc/context.h +102 -96
  46. data/vendor/brotli/enc/dictionary_hash.h +9 -5
  47. data/vendor/brotli/enc/encode.c +1562 -0
  48. data/vendor/brotli/enc/encode.h +211 -199
  49. data/vendor/brotli/enc/encode_parallel.cc +161 -151
  50. data/vendor/brotli/enc/encode_parallel.h +7 -8
  51. data/vendor/brotli/enc/entropy_encode.c +501 -0
  52. data/vendor/brotli/enc/entropy_encode.h +107 -89
  53. data/vendor/brotli/enc/entropy_encode_static.h +29 -62
  54. data/vendor/brotli/enc/fast_log.h +26 -20
  55. data/vendor/brotli/enc/find_match_length.h +23 -20
  56. data/vendor/brotli/enc/hash.h +614 -871
  57. data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
  58. data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
  59. data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
  60. data/vendor/brotli/enc/histogram.c +95 -0
  61. data/vendor/brotli/enc/histogram.h +49 -83
  62. data/vendor/brotli/enc/histogram_inc.h +51 -0
  63. data/vendor/brotli/enc/literal_cost.c +178 -0
  64. data/vendor/brotli/enc/literal_cost.h +16 -10
  65. data/vendor/brotli/enc/memory.c +181 -0
  66. data/vendor/brotli/enc/memory.h +62 -0
  67. data/vendor/brotli/enc/metablock.c +515 -0
  68. data/vendor/brotli/enc/metablock.h +87 -57
  69. data/vendor/brotli/enc/metablock_inc.h +183 -0
  70. data/vendor/brotli/enc/port.h +73 -47
  71. data/vendor/brotli/enc/prefix.h +34 -61
  72. data/vendor/brotli/enc/quality.h +130 -0
  73. data/vendor/brotli/enc/ringbuffer.h +137 -122
  74. data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
  75. data/vendor/brotli/enc/static_dict.h +23 -18
  76. data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
  77. data/vendor/brotli/enc/streams.cc +7 -7
  78. data/vendor/brotli/enc/streams.h +32 -32
  79. data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
  80. data/vendor/brotli/enc/utf8_util.h +16 -9
  81. data/vendor/brotli/enc/write_bits.h +49 -43
  82. metadata +34 -25
  83. data/ext/brotli/brotli.cc +0 -181
  84. data/vendor/brotli/dec/Makefile +0 -12
  85. data/vendor/brotli/dec/dictionary.c +0 -9466
  86. data/vendor/brotli/dec/dictionary.h +0 -38
  87. data/vendor/brotli/dec/types.h +0 -38
  88. data/vendor/brotli/enc/Makefile +0 -14
  89. data/vendor/brotli/enc/backward_references.cc +0 -858
  90. data/vendor/brotli/enc/block_splitter.cc +0 -505
  91. data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
  92. data/vendor/brotli/enc/compress_fragment.cc +0 -701
  93. data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
  94. data/vendor/brotli/enc/dictionary.cc +0 -9466
  95. data/vendor/brotli/enc/dictionary.h +0 -41
  96. data/vendor/brotli/enc/encode.cc +0 -1180
  97. data/vendor/brotli/enc/entropy_encode.cc +0 -480
  98. data/vendor/brotli/enc/histogram.cc +0 -67
  99. data/vendor/brotli/enc/literal_cost.cc +0 -165
  100. data/vendor/brotli/enc/metablock.cc +0 -539
  101. data/vendor/brotli/enc/transform.h +0 -248
  102. data/vendor/brotli/enc/types.h +0 -29
@@ -4,44 +4,55 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- // Function for fast encoding of an input fragment, independently from the input
8
- // history. This function uses one-pass processing: when we find a backward
9
- // match, we immediately emit the corresponding command and literal codes to
10
- // the bit stream.
7
+ /* Function for fast encoding of an input fragment, independently from the input
8
+ history. This function uses one-pass processing: when we find a backward
9
+ match, we immediately emit the corresponding command and literal codes to
10
+ the bit stream. */
11
11
 
12
12
  #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
13
13
  #define BROTLI_ENC_COMPRESS_FRAGMENT_H_
14
14
 
15
- #include "./types.h"
16
-
17
- namespace brotli {
18
-
19
- // Compresses "input" string to the "*storage" buffer as one or more complete
20
- // meta-blocks, and updates the "*storage_ix" bit position.
21
- //
22
- // If "is_last" is true, emits an additional empty last meta-block.
23
- //
24
- // "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
25
- // (see comment in encode.h) used for the encoding of this input fragment.
26
- // If "is_last" is false, they are updated to reflect the statistics
27
- // of this input fragment, to be used for the encoding of the next fragment.
28
- //
29
- // "*cmd_code_numbits" is the number of bits of the compressed representation
30
- // of the command and distance prefix codes, and "cmd_code" is an array of
31
- // at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
32
- // command and distance prefix codes. If "is_last" is false, these are also
33
- // updated to represent the updated "cmd_depth" and "cmd_bits".
34
- //
35
- // REQUIRES: "input_size" is greater than zero, or "is_last" is true.
36
- // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
37
- // REQUIRES: "table_size" is a power of two
38
- void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
39
- bool is_last,
40
- int* table, size_t table_size,
41
- uint8_t cmd_depth[128], uint16_t cmd_bits[128],
42
- size_t* cmd_code_numbits, uint8_t* cmd_code,
43
- size_t* storage_ix, uint8_t* storage);
44
-
45
- } // namespace brotli
46
-
47
- #endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_
15
+ #include "../common/types.h"
16
+ #include "./memory.h"
17
+ #include "./port.h"
18
+
19
+ #if defined(__cplusplus) || defined(c_plusplus)
20
+ extern "C" {
21
+ #endif
22
+
23
+ /* Compresses "input" string to the "*storage" buffer as one or more complete
24
+ meta-blocks, and updates the "*storage_ix" bit position.
25
+
26
+ If "is_last" is 1, emits an additional empty last meta-block.
27
+
28
+ "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
29
+ (see comment in encode.h) used for the encoding of this input fragment.
30
+ If "is_last" is 0, they are updated to reflect the statistics
31
+ of this input fragment, to be used for the encoding of the next fragment.
32
+
33
+ "*cmd_code_numbits" is the number of bits of the compressed representation
34
+ of the command and distance prefix codes, and "cmd_code" is an array of
35
+ at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
36
+ command and distance prefix codes. If "is_last" is 0, these are also
37
+ updated to represent the updated "cmd_depth" and "cmd_bits".
38
+
39
+ REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
40
+ REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
41
+ REQUIRES: "table_size" is a power of two */
42
+ BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
43
+ const uint8_t* input,
44
+ size_t input_size,
45
+ BROTLI_BOOL is_last,
46
+ int* table, size_t table_size,
47
+ uint8_t cmd_depth[128],
48
+ uint16_t cmd_bits[128],
49
+ size_t* cmd_code_numbits,
50
+ uint8_t* cmd_code,
51
+ size_t* storage_ix,
52
+ uint8_t* storage);
53
+
54
+ #if defined(__cplusplus) || defined(c_plusplus)
55
+ } /* extern "C" */
56
+ #endif
57
+
58
+ #endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
@@ -0,0 +1,557 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Function for fast encoding of an input fragment, independently from the input
8
+ history. This function uses two-pass processing: in the first pass we save
9
+ the found backward matches and literal bytes into a buffer, and in the
10
+ second pass we emit them into the bit stream using prefix codes built based
11
+ on the actual command and literal byte histograms. */
12
+
13
+ #include "./compress_fragment_two_pass.h"
14
+
15
+ #include <string.h> /* memcmp, memcpy, memset */
16
+
17
+ #include "../common/types.h"
18
+ #include "./bit_cost.h"
19
+ #include "./brotli_bit_stream.h"
20
+ #include "./entropy_encode.h"
21
+ #include "./fast_log.h"
22
+ #include "./find_match_length.h"
23
+ #include "./memory.h"
24
+ #include "./port.h"
25
+ #include "./write_bits.h"
26
+
27
+
28
+ #if defined(__cplusplus) || defined(c_plusplus)
29
+ extern "C" {
30
+ #endif
31
+
32
+ /* kHashMul32 multiplier has these properties:
33
+ * The multiplier must be odd. Otherwise we may lose the highest bit.
34
+ * No long streaks of 1s or 0s.
35
+ * There is no effort to ensure that it is a prime, the oddity is enough
36
+ for this use.
37
+ * The number has been tuned heuristically against compression benchmarks. */
38
+ static const uint32_t kHashMul32 = 0x1e35a7bd;
39
+
40
+ static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
41
+ const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
42
+ return (uint32_t)(h >> shift);
43
+ }
44
+
45
+ static BROTLI_INLINE uint32_t HashBytesAtOffset(
46
+ uint64_t v, int offset, size_t shift) {
47
+ assert(offset >= 0);
48
+ assert(offset <= 2);
49
+ {
50
+ const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
51
+ return (uint32_t)(h >> shift);
52
+ }
53
+ }
54
+
55
+ static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
56
+ return TO_BROTLI_BOOL(
57
+ BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
58
+ p1[4] == p2[4] &&
59
+ p1[5] == p2[5]);
60
+ }
61
+
62
+ /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
63
+ "bits" based on "histogram" and stores it into the bit stream. */
64
+ static void BuildAndStoreCommandPrefixCode(
65
+ const uint32_t histogram[128],
66
+ uint8_t depth[128], uint16_t bits[128],
67
+ size_t* storage_ix, uint8_t* storage) {
68
+ /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
69
+ HuffmanTree tree[129];
70
+ uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
71
+ uint16_t cmd_bits[64];
72
+ BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
73
+ BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
74
+ /* We have to jump through a few hoopes here in order to compute
75
+ the command bits because the symbols are in a different order than in
76
+ the full alphabet. This looks complicated, but having the symbols
77
+ in this order in the command bits saves a few branches in the Emit*
78
+ functions. */
79
+ memcpy(cmd_depth, depth + 24, 24);
80
+ memcpy(cmd_depth + 24, depth, 8);
81
+ memcpy(cmd_depth + 32, depth + 48, 8);
82
+ memcpy(cmd_depth + 40, depth + 8, 8);
83
+ memcpy(cmd_depth + 48, depth + 56, 8);
84
+ memcpy(cmd_depth + 56, depth + 16, 8);
85
+ BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
86
+ memcpy(bits, cmd_bits + 24, 16);
87
+ memcpy(bits + 8, cmd_bits + 40, 16);
88
+ memcpy(bits + 16, cmd_bits + 56, 16);
89
+ memcpy(bits + 24, cmd_bits, 48);
90
+ memcpy(bits + 48, cmd_bits + 32, 16);
91
+ memcpy(bits + 56, cmd_bits + 48, 16);
92
+ BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
93
+ {
94
+ /* Create the bit length array for the full command alphabet. */
95
+ size_t i;
96
+ memset(cmd_depth, 0, 64); /* only 64 first values were used */
97
+ memcpy(cmd_depth, depth + 24, 8);
98
+ memcpy(cmd_depth + 64, depth + 32, 8);
99
+ memcpy(cmd_depth + 128, depth + 40, 8);
100
+ memcpy(cmd_depth + 192, depth + 48, 8);
101
+ memcpy(cmd_depth + 384, depth + 56, 8);
102
+ for (i = 0; i < 8; ++i) {
103
+ cmd_depth[128 + 8 * i] = depth[i];
104
+ cmd_depth[256 + 8 * i] = depth[8 + i];
105
+ cmd_depth[448 + 8 * i] = depth[16 + i];
106
+ }
107
+ BrotliStoreHuffmanTree(
108
+ cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
109
+ }
110
+ BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
111
+ }
112
+
113
+ static BROTLI_INLINE void EmitInsertLen(
114
+ uint32_t insertlen, uint32_t** commands) {
115
+ if (insertlen < 6) {
116
+ **commands = insertlen;
117
+ } else if (insertlen < 130) {
118
+ const uint32_t tail = insertlen - 2;
119
+ const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
120
+ const uint32_t prefix = tail >> nbits;
121
+ const uint32_t inscode = (nbits << 1) + prefix + 2;
122
+ const uint32_t extra = tail - (prefix << nbits);
123
+ **commands = inscode | (extra << 8);
124
+ } else if (insertlen < 2114) {
125
+ const uint32_t tail = insertlen - 66;
126
+ const uint32_t nbits = Log2FloorNonZero(tail);
127
+ const uint32_t code = nbits + 10;
128
+ const uint32_t extra = tail - (1u << nbits);
129
+ **commands = code | (extra << 8);
130
+ } else if (insertlen < 6210) {
131
+ const uint32_t extra = insertlen - 2114;
132
+ **commands = 21 | (extra << 8);
133
+ } else if (insertlen < 22594) {
134
+ const uint32_t extra = insertlen - 6210;
135
+ **commands = 22 | (extra << 8);
136
+ } else {
137
+ const uint32_t extra = insertlen - 22594;
138
+ **commands = 23 | (extra << 8);
139
+ }
140
+ ++(*commands);
141
+ }
142
+
143
+ static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
144
+ if (copylen < 10) {
145
+ **commands = (uint32_t)(copylen + 38);
146
+ } else if (copylen < 134) {
147
+ const size_t tail = copylen - 6;
148
+ const size_t nbits = Log2FloorNonZero(tail) - 1;
149
+ const size_t prefix = tail >> nbits;
150
+ const size_t code = (nbits << 1) + prefix + 44;
151
+ const size_t extra = tail - (prefix << nbits);
152
+ **commands = (uint32_t)(code | (extra << 8));
153
+ } else if (copylen < 2118) {
154
+ const size_t tail = copylen - 70;
155
+ const size_t nbits = Log2FloorNonZero(tail);
156
+ const size_t code = nbits + 52;
157
+ const size_t extra = tail - ((size_t)1 << nbits);
158
+ **commands = (uint32_t)(code | (extra << 8));
159
+ } else {
160
+ const size_t extra = copylen - 2118;
161
+ **commands = (uint32_t)(63 | (extra << 8));
162
+ }
163
+ ++(*commands);
164
+ }
165
+
166
+ static BROTLI_INLINE void EmitCopyLenLastDistance(
167
+ size_t copylen, uint32_t** commands) {
168
+ if (copylen < 12) {
169
+ **commands = (uint32_t)(copylen + 20);
170
+ ++(*commands);
171
+ } else if (copylen < 72) {
172
+ const size_t tail = copylen - 8;
173
+ const size_t nbits = Log2FloorNonZero(tail) - 1;
174
+ const size_t prefix = tail >> nbits;
175
+ const size_t code = (nbits << 1) + prefix + 28;
176
+ const size_t extra = tail - (prefix << nbits);
177
+ **commands = (uint32_t)(code | (extra << 8));
178
+ ++(*commands);
179
+ } else if (copylen < 136) {
180
+ const size_t tail = copylen - 8;
181
+ const size_t code = (tail >> 5) + 54;
182
+ const size_t extra = tail & 31;
183
+ **commands = (uint32_t)(code | (extra << 8));
184
+ ++(*commands);
185
+ **commands = 64;
186
+ ++(*commands);
187
+ } else if (copylen < 2120) {
188
+ const size_t tail = copylen - 72;
189
+ const size_t nbits = Log2FloorNonZero(tail);
190
+ const size_t code = nbits + 52;
191
+ const size_t extra = tail - ((size_t)1 << nbits);
192
+ **commands = (uint32_t)(code | (extra << 8));
193
+ ++(*commands);
194
+ **commands = 64;
195
+ ++(*commands);
196
+ } else {
197
+ const size_t extra = copylen - 2120;
198
+ **commands = (uint32_t)(63 | (extra << 8));
199
+ ++(*commands);
200
+ **commands = 64;
201
+ ++(*commands);
202
+ }
203
+ }
204
+
205
+ static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
206
+ uint32_t d = distance + 3;
207
+ uint32_t nbits = Log2FloorNonZero(d) - 1;
208
+ const uint32_t prefix = (d >> nbits) & 1;
209
+ const uint32_t offset = (2 + prefix) << nbits;
210
+ const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
211
+ uint32_t extra = d - offset;
212
+ **commands = distcode | (extra << 8);
213
+ ++(*commands);
214
+ }
215
+
216
+ /* REQUIRES: len <= 1 << 20. */
217
+ static void BrotliStoreMetaBlockHeader(
218
+ size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
219
+ uint8_t* storage) {
220
+ /* ISLAST */
221
+ BrotliWriteBits(1, 0, storage_ix, storage);
222
+ if (len <= (1U << 16)) {
223
+ /* MNIBBLES is 4 */
224
+ BrotliWriteBits(2, 0, storage_ix, storage);
225
+ BrotliWriteBits(16, len - 1, storage_ix, storage);
226
+ } else {
227
+ /* MNIBBLES is 5 */
228
+ BrotliWriteBits(2, 1, storage_ix, storage);
229
+ BrotliWriteBits(20, len - 1, storage_ix, storage);
230
+ }
231
+ /* ISUNCOMPRESSED */
232
+ BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
233
+ }
234
+
235
+ static void CreateCommands(const uint8_t* input, size_t block_size,
236
+ size_t input_size, const uint8_t* base_ip, int* table, size_t table_size,
237
+ uint8_t** literals, uint32_t** commands) {
238
+ /* "ip" is the input pointer. */
239
+ const uint8_t* ip = input;
240
+ const size_t shift = 64u - Log2FloorNonZero(table_size);
241
+ const uint8_t* ip_end = input + block_size;
242
+ /* "next_emit" is a pointer to the first byte that is not covered by a
243
+ previous copy. Bytes between "next_emit" and the start of the next copy or
244
+ the end of the input will be emitted as literal bytes. */
245
+ const uint8_t* next_emit = input;
246
+
247
+ int last_distance = -1;
248
+ const size_t kInputMarginBytes = 16;
249
+ const size_t kMinMatchLen = 6;
250
+
251
+ assert(table_size);
252
+ assert(table_size <= (1u << 31));
253
+ /* table must be power of two */
254
+ assert((table_size & (table_size - 1)) == 0);
255
+ assert(table_size - 1 ==
256
+ (size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
257
+
258
+ if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
259
+ /* For the last block, we need to keep a 16 bytes margin so that we can be
260
+ sure that all distances are at most window size - 16.
261
+ For all other blocks, we only need to keep a margin of 5 bytes so that
262
+ we don't go over the block size with a copy. */
263
+ const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
264
+ input_size - kInputMarginBytes);
265
+ const uint8_t* ip_limit = input + len_limit;
266
+
267
+ uint32_t next_hash;
268
+ for (next_hash = Hash(++ip, shift); ; ) {
269
+ /* Step 1: Scan forward in the input looking for a 6-byte-long match.
270
+ If we get close to exhausting the input then goto emit_remainder.
271
+
272
+ Heuristic match skipping: If 32 bytes are scanned with no matches
273
+ found, start looking only at every other byte. If 32 more bytes are
274
+ scanned, look at every third byte, etc.. When a match is found,
275
+ immediately go back to looking at every byte. This is a small loss
276
+ (~5% performance, ~0.1% density) for compressible data due to more
277
+ bookkeeping, but for non-compressible data (such as JPEG) it's a huge
278
+ win since the compressor quickly "realizes" the data is incompressible
279
+ and doesn't bother looking for matches everywhere.
280
+
281
+ The "skip" variable keeps track of how many bytes there are since the
282
+ last match; dividing it by 32 (ie. right-shifting by five) gives the
283
+ number of bytes to move ahead for each iteration. */
284
+ uint32_t skip = 32;
285
+
286
+ const uint8_t* next_ip = ip;
287
+ const uint8_t* candidate;
288
+
289
+ assert(next_emit < ip);
290
+
291
+ do {
292
+ uint32_t hash = next_hash;
293
+ uint32_t bytes_between_hash_lookups = skip++ >> 5;
294
+ ip = next_ip;
295
+ assert(hash == Hash(ip, shift));
296
+ next_ip = ip + bytes_between_hash_lookups;
297
+ if (PREDICT_FALSE(next_ip > ip_limit)) {
298
+ goto emit_remainder;
299
+ }
300
+ next_hash = Hash(next_ip, shift);
301
+ candidate = ip - last_distance;
302
+ if (IsMatch(ip, candidate)) {
303
+ if (PREDICT_TRUE(candidate < ip)) {
304
+ table[hash] = (int)(ip - base_ip);
305
+ break;
306
+ }
307
+ }
308
+ candidate = base_ip + table[hash];
309
+ assert(candidate >= base_ip);
310
+ assert(candidate < ip);
311
+
312
+ table[hash] = (int)(ip - base_ip);
313
+ } while (PREDICT_TRUE(!IsMatch(ip, candidate)));
314
+
315
+ /* Step 2: Emit the found match together with the literal bytes from
316
+ "next_emit", and then see if we can find a next macth immediately
317
+ afterwards. Repeat until we find no match for the input
318
+ without emitting some literal bytes. */
319
+
320
+ {
321
+ /* We have a 6-byte match at ip, and we need to emit bytes in
322
+ [next_emit, ip). */
323
+ const uint8_t* base = ip;
324
+ size_t matched = 6 + FindMatchLengthWithLimit(
325
+ candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
326
+ int distance = (int)(base - candidate); /* > 0 */
327
+ int insert = (int)(base - next_emit);
328
+ ip += matched;
329
+ assert(0 == memcmp(base, candidate, matched));
330
+ EmitInsertLen((uint32_t)insert, commands);
331
+ memcpy(*literals, next_emit, (size_t)insert);
332
+ *literals += insert;
333
+ if (distance == last_distance) {
334
+ **commands = 64;
335
+ ++(*commands);
336
+ } else {
337
+ EmitDistance((uint32_t)distance, commands);
338
+ last_distance = distance;
339
+ }
340
+ EmitCopyLenLastDistance(matched, commands);
341
+
342
+ next_emit = ip;
343
+ if (PREDICT_FALSE(ip >= ip_limit)) {
344
+ goto emit_remainder;
345
+ }
346
+ {
347
+ /* We could immediately start working at ip now, but to improve
348
+ compression we first update "table" with the hashes of some
349
+ positions within the last copy. */
350
+ uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
351
+ uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
352
+ uint32_t cur_hash;
353
+ table[prev_hash] = (int)(ip - base_ip - 5);
354
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
355
+ table[prev_hash] = (int)(ip - base_ip - 4);
356
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
357
+ table[prev_hash] = (int)(ip - base_ip - 3);
358
+ input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
359
+ cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
360
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
361
+ table[prev_hash] = (int)(ip - base_ip - 2);
362
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
363
+ table[prev_hash] = (int)(ip - base_ip - 1);
364
+
365
+ candidate = base_ip + table[cur_hash];
366
+ table[cur_hash] = (int)(ip - base_ip);
367
+ }
368
+ }
369
+
370
+ while (IsMatch(ip, candidate)) {
371
+ /* We have a 6-byte match at ip, and no need to emit any
372
+ literal bytes prior to ip. */
373
+ const uint8_t* base = ip;
374
+ size_t matched = 6 + FindMatchLengthWithLimit(
375
+ candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
376
+ ip += matched;
377
+ last_distance = (int)(base - candidate); /* > 0 */
378
+ assert(0 == memcmp(base, candidate, matched));
379
+ EmitCopyLen(matched, commands);
380
+ EmitDistance((uint32_t)last_distance, commands);
381
+
382
+ next_emit = ip;
383
+ if (PREDICT_FALSE(ip >= ip_limit)) {
384
+ goto emit_remainder;
385
+ }
386
+ {
387
+ /* We could immediately start working at ip now, but to improve
388
+ compression we first update "table" with the hashes of some
389
+ positions within the last copy. */
390
+ uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
391
+ uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
392
+ uint32_t cur_hash;
393
+ table[prev_hash] = (int)(ip - base_ip - 5);
394
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
395
+ table[prev_hash] = (int)(ip - base_ip - 4);
396
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
397
+ table[prev_hash] = (int)(ip - base_ip - 3);
398
+ input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
399
+ cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
400
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
401
+ table[prev_hash] = (int)(ip - base_ip - 2);
402
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
403
+ table[prev_hash] = (int)(ip - base_ip - 1);
404
+
405
+ candidate = base_ip + table[cur_hash];
406
+ table[cur_hash] = (int)(ip - base_ip);
407
+ }
408
+ }
409
+
410
+ next_hash = Hash(++ip, shift);
411
+ }
412
+ }
413
+
414
+ emit_remainder:
415
+ assert(next_emit <= ip_end);
416
+ /* Emit the remaining bytes as literals. */
417
+ if (next_emit < ip_end) {
418
+ const uint32_t insert = (uint32_t)(ip_end - next_emit);
419
+ EmitInsertLen(insert, commands);
420
+ memcpy(*literals, next_emit, insert);
421
+ *literals += insert;
422
+ }
423
+ }
424
+
425
+ static void StoreCommands(MemoryManager* m,
426
+ const uint8_t* literals, const size_t num_literals,
427
+ const uint32_t* commands, const size_t num_commands,
428
+ size_t* storage_ix, uint8_t* storage) {
429
+ static const uint32_t kNumExtraBits[128] = {
430
+ 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
431
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
432
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
433
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434
+ 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
435
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
436
+ 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
437
+ };
438
+ static const uint32_t kInsertOffset[24] = {
439
+ 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
440
+ 1090, 2114, 6210, 22594,
441
+ };
442
+
443
+ uint8_t lit_depths[256];
444
+ uint16_t lit_bits[256];
445
+ uint32_t lit_histo[256] = { 0 };
446
+ uint8_t cmd_depths[128] = { 0 };
447
+ uint16_t cmd_bits[128] = { 0 };
448
+ uint32_t cmd_histo[128] = { 0 };
449
+ size_t i;
450
+ for (i = 0; i < num_literals; ++i) {
451
+ ++lit_histo[literals[i]];
452
+ }
453
+ BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
454
+ /* max_bits = */ 8,
455
+ lit_depths, lit_bits,
456
+ storage_ix, storage);
457
+ if (BROTLI_IS_OOM(m)) return;
458
+
459
+ for (i = 0; i < num_commands; ++i) {
460
+ ++cmd_histo[commands[i] & 0xff];
461
+ }
462
+ cmd_histo[1] += 1;
463
+ cmd_histo[2] += 1;
464
+ cmd_histo[64] += 1;
465
+ cmd_histo[84] += 1;
466
+ BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
467
+ storage_ix, storage);
468
+
469
+ for (i = 0; i < num_commands; ++i) {
470
+ const uint32_t cmd = commands[i];
471
+ const uint32_t code = cmd & 0xff;
472
+ const uint32_t extra = cmd >> 8;
473
+ BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
474
+ BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
475
+ if (code < 24) {
476
+ const uint32_t insert = kInsertOffset[code] + extra;
477
+ uint32_t j;
478
+ for (j = 0; j < insert; ++j) {
479
+ const uint8_t lit = *literals;
480
+ BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
481
+ ++literals;
482
+ }
483
+ }
484
+ }
485
+ }
486
+
487
+ /* Acceptable loss for uncompressible speedup is 2% */
488
+ #define MIN_RATIO 0.98
489
+ #define SAMPLE_RATE 43
490
+
491
+ static BROTLI_BOOL ShouldCompress(
492
+ const uint8_t* input, size_t input_size, size_t num_literals) {
493
+ double corpus_size = (double)input_size;
494
+ if (num_literals < MIN_RATIO * corpus_size) {
495
+ return BROTLI_TRUE;
496
+ } else {
497
+ uint32_t literal_histo[256] = { 0 };
498
+ const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
499
+ size_t i;
500
+ for (i = 0; i < input_size; i += SAMPLE_RATE) {
501
+ ++literal_histo[input[i]];
502
+ }
503
+ return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
504
+ }
505
+ }
506
+
507
+ void BrotliCompressFragmentTwoPass(MemoryManager* m,
508
+ const uint8_t* input, size_t input_size,
509
+ BROTLI_BOOL is_last,
510
+ uint32_t* command_buf, uint8_t* literal_buf,
511
+ int* table, size_t table_size,
512
+ size_t* storage_ix, uint8_t* storage) {
513
+ /* Save the start of the first block for position and distance computations.
514
+ */
515
+ const uint8_t* base_ip = input;
516
+
517
+ while (input_size > 0) {
518
+ size_t block_size =
519
+ BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
520
+ uint32_t* commands = command_buf;
521
+ uint8_t* literals = literal_buf;
522
+ size_t num_literals;
523
+ CreateCommands(input, block_size, input_size, base_ip, table, table_size,
524
+ &literals, &commands);
525
+ num_literals = (size_t)(literals - literal_buf);
526
+ if (ShouldCompress(input, block_size, num_literals)) {
527
+ const size_t num_commands = (size_t)(commands - command_buf);
528
+ BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
529
+ /* No block splits, no contexts. */
530
+ BrotliWriteBits(13, 0, storage_ix, storage);
531
+ StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
532
+ storage_ix, storage);
533
+ if (BROTLI_IS_OOM(m)) return;
534
+ } else {
535
+ /* Since we did not find many backward references and the entropy of
536
+ the data is close to 8 bits, we can simply emit an uncompressed block.
537
+ This makes compression speed of uncompressible data about 3x faster. */
538
+ BrotliStoreMetaBlockHeader(block_size, 1, storage_ix, storage);
539
+ *storage_ix = (*storage_ix + 7u) & ~7u;
540
+ memcpy(&storage[*storage_ix >> 3], input, block_size);
541
+ *storage_ix += block_size << 3;
542
+ storage[*storage_ix >> 3] = 0;
543
+ }
544
+ input += block_size;
545
+ input_size -= block_size;
546
+ }
547
+
548
+ if (is_last) {
549
+ BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
550
+ BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
551
+ *storage_ix = (*storage_ix + 7u) & ~7u;
552
+ }
553
+ }
554
+
555
+ #if defined(__cplusplus) || defined(c_plusplus)
556
+ } /* extern "C" */
557
+ #endif