extbrotli 0.0.1.PROTOTYPE

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +28 -0
  3. data/README.md +67 -0
  4. data/Rakefile +158 -0
  5. data/contrib/brotli/LICENSE +202 -0
  6. data/contrib/brotli/README.md +18 -0
  7. data/contrib/brotli/dec/bit_reader.c +55 -0
  8. data/contrib/brotli/dec/bit_reader.h +256 -0
  9. data/contrib/brotli/dec/context.h +260 -0
  10. data/contrib/brotli/dec/decode.c +1573 -0
  11. data/contrib/brotli/dec/decode.h +160 -0
  12. data/contrib/brotli/dec/dictionary.h +9494 -0
  13. data/contrib/brotli/dec/huffman.c +325 -0
  14. data/contrib/brotli/dec/huffman.h +77 -0
  15. data/contrib/brotli/dec/port.h +148 -0
  16. data/contrib/brotli/dec/prefix.h +756 -0
  17. data/contrib/brotli/dec/state.c +149 -0
  18. data/contrib/brotli/dec/state.h +185 -0
  19. data/contrib/brotli/dec/streams.c +99 -0
  20. data/contrib/brotli/dec/streams.h +100 -0
  21. data/contrib/brotli/dec/transform.h +315 -0
  22. data/contrib/brotli/dec/types.h +36 -0
  23. data/contrib/brotli/enc/backward_references.cc +769 -0
  24. data/contrib/brotli/enc/backward_references.h +50 -0
  25. data/contrib/brotli/enc/bit_cost.h +147 -0
  26. data/contrib/brotli/enc/block_splitter.cc +418 -0
  27. data/contrib/brotli/enc/block_splitter.h +78 -0
  28. data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
  29. data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
  30. data/contrib/brotli/enc/cluster.h +290 -0
  31. data/contrib/brotli/enc/command.h +140 -0
  32. data/contrib/brotli/enc/context.h +185 -0
  33. data/contrib/brotli/enc/dictionary.h +9485 -0
  34. data/contrib/brotli/enc/dictionary_hash.h +4125 -0
  35. data/contrib/brotli/enc/encode.cc +715 -0
  36. data/contrib/brotli/enc/encode.h +196 -0
  37. data/contrib/brotli/enc/encode_parallel.cc +354 -0
  38. data/contrib/brotli/enc/encode_parallel.h +37 -0
  39. data/contrib/brotli/enc/entropy_encode.cc +492 -0
  40. data/contrib/brotli/enc/entropy_encode.h +88 -0
  41. data/contrib/brotli/enc/fast_log.h +179 -0
  42. data/contrib/brotli/enc/find_match_length.h +87 -0
  43. data/contrib/brotli/enc/hash.h +686 -0
  44. data/contrib/brotli/enc/histogram.cc +76 -0
  45. data/contrib/brotli/enc/histogram.h +100 -0
  46. data/contrib/brotli/enc/literal_cost.cc +172 -0
  47. data/contrib/brotli/enc/literal_cost.h +38 -0
  48. data/contrib/brotli/enc/metablock.cc +544 -0
  49. data/contrib/brotli/enc/metablock.h +88 -0
  50. data/contrib/brotli/enc/port.h +151 -0
  51. data/contrib/brotli/enc/prefix.h +85 -0
  52. data/contrib/brotli/enc/ringbuffer.h +108 -0
  53. data/contrib/brotli/enc/static_dict.cc +441 -0
  54. data/contrib/brotli/enc/static_dict.h +40 -0
  55. data/contrib/brotli/enc/static_dict_lut.h +12063 -0
  56. data/contrib/brotli/enc/streams.cc +127 -0
  57. data/contrib/brotli/enc/streams.h +129 -0
  58. data/contrib/brotli/enc/transform.h +250 -0
  59. data/contrib/brotli/enc/write_bits.h +91 -0
  60. data/ext/extbrotli.cc +24 -0
  61. data/ext/extbrotli.h +73 -0
  62. data/ext/extconf.rb +35 -0
  63. data/ext/lldecoder.c +220 -0
  64. data/ext/llencoder.cc +433 -0
  65. data/gemstub.rb +21 -0
  66. data/lib/extbrotli.rb +243 -0
  67. data/lib/extbrotli/version.rb +3 -0
  68. metadata +140 -0
@@ -0,0 +1,315 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ */
15
+
16
+ /* Transformations on dictionary words. */
17
+
18
+ #ifndef BROTLI_DEC_TRANSFORM_H_
19
+ #define BROTLI_DEC_TRANSFORM_H_
20
+
21
+ #include <stdio.h>
22
+ #include <ctype.h>
23
+ #include "./port.h"
24
+ #include "./types.h"
25
+
26
+ #if defined(__cplusplus) || defined(c_plusplus)
27
+ extern "C" {
28
+ #endif
29
+
30
+ enum WordTransformType {
31
+ kIdentity = 0,
32
+ kOmitLast1 = 1,
33
+ kOmitLast2 = 2,
34
+ kOmitLast3 = 3,
35
+ kOmitLast4 = 4,
36
+ kOmitLast5 = 5,
37
+ kOmitLast6 = 6,
38
+ kOmitLast7 = 7,
39
+ kOmitLast8 = 8,
40
+ kOmitLast9 = 9,
41
+ kUppercaseFirst = 10,
42
+ kUppercaseAll = 11,
43
+ kOmitFirst1 = 12,
44
+ kOmitFirst2 = 13,
45
+ kOmitFirst3 = 14,
46
+ kOmitFirst4 = 15,
47
+ kOmitFirst5 = 16,
48
+ kOmitFirst6 = 17,
49
+ kOmitFirst7 = 18,
50
+ kOmitFirst8 = 19,
51
+ kOmitFirst9 = 20
52
+ };
53
+
54
+ typedef struct {
55
+ const uint8_t prefix_id;
56
+ const uint8_t transform;
57
+ const uint8_t suffix_id;
58
+ } Transform;
59
+
60
+ static const char kPrefixSuffix[208] =
61
+ "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
62
+ " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
63
+ " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
64
+ " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
65
+
66
+ enum {
67
+ /* EMPTY = ""
68
+ SP = " "
69
+ DQUOT = "\""
70
+ SQUOT = "'"
71
+ CLOSEBR = "]"
72
+ OPEN = "("
73
+ SLASH = "/"
74
+ NBSP = non-breaking space "\0xc2\xa0"
75
+ */
76
+ kPFix_EMPTY = 0,
77
+ kPFix_SP = 1,
78
+ kPFix_COMMASP = 3,
79
+ kPFix_SPofSPtheSP = 6,
80
+ kPFix_SPtheSP = 9,
81
+ kPFix_eSP = 12,
82
+ kPFix_SPofSP = 15,
83
+ kPFix_sSP = 20,
84
+ kPFix_DOT = 23,
85
+ kPFix_SPandSP = 25,
86
+ kPFix_SPinSP = 31,
87
+ kPFix_DQUOT = 36,
88
+ kPFix_SPtoSP = 38,
89
+ kPFix_DQUOTGT = 43,
90
+ kPFix_NEWLINE = 46,
91
+ kPFix_DOTSP = 48,
92
+ kPFix_CLOSEBR = 51,
93
+ kPFix_SPforSP = 53,
94
+ kPFix_SPaSP = 59,
95
+ kPFix_SPthatSP = 63,
96
+ kPFix_SQUOT = 70,
97
+ kPFix_SPwithSP = 72,
98
+ kPFix_SPfromSP = 79,
99
+ kPFix_SPbySP = 86,
100
+ kPFix_OPEN = 91,
101
+ kPFix_DOTSPTheSP = 93,
102
+ kPFix_SPonSP = 100,
103
+ kPFix_SPasSP = 105,
104
+ kPFix_SPisSP = 110,
105
+ kPFix_ingSP = 115,
106
+ kPFix_NEWLINETAB = 120,
107
+ kPFix_COLON = 123,
108
+ kPFix_edSP = 125,
109
+ kPFix_EQDQUOT = 129,
110
+ kPFix_SPatSP = 132,
111
+ kPFix_lySP = 137,
112
+ kPFix_COMMA = 141,
113
+ kPFix_EQSQUOT = 143,
114
+ kPFix_DOTcomSLASH = 146,
115
+ kPFix_DOTSPThisSP = 152,
116
+ kPFix_SPnotSP = 160,
117
+ kPFix_erSP = 166,
118
+ kPFix_alSP = 170,
119
+ kPFix_fulSP = 174,
120
+ kPFix_iveSP = 179,
121
+ kPFix_lessSP = 184,
122
+ kPFix_estSP = 190,
123
+ kPFix_izeSP = 195,
124
+ kPFix_NBSP = 200,
125
+ kPFix_ousSP = 203
126
+ };
127
+
128
+
129
+ static const Transform kTransforms[] = {
130
+ { kPFix_EMPTY, kIdentity, kPFix_EMPTY },
131
+ { kPFix_EMPTY, kIdentity, kPFix_SP },
132
+ { kPFix_SP, kIdentity, kPFix_SP },
133
+ { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
134
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
135
+ { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
136
+ { kPFix_SP, kIdentity, kPFix_EMPTY },
137
+ { kPFix_sSP, kIdentity, kPFix_SP },
138
+ { kPFix_EMPTY, kIdentity, kPFix_SPofSP },
139
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
140
+ { kPFix_EMPTY, kIdentity, kPFix_SPandSP },
141
+ { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
142
+ { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
143
+ { kPFix_COMMASP, kIdentity, kPFix_SP },
144
+ { kPFix_EMPTY, kIdentity, kPFix_COMMASP },
145
+ { kPFix_SP, kUppercaseFirst, kPFix_SP },
146
+ { kPFix_EMPTY, kIdentity, kPFix_SPinSP },
147
+ { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
148
+ { kPFix_eSP, kIdentity, kPFix_SP },
149
+ { kPFix_EMPTY, kIdentity, kPFix_DQUOT },
150
+ { kPFix_EMPTY, kIdentity, kPFix_DOT },
151
+ { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
152
+ { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
153
+ { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
154
+ { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
155
+ { kPFix_EMPTY, kIdentity, kPFix_SPforSP },
156
+ { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
157
+ { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
158
+ { kPFix_EMPTY, kIdentity, kPFix_SPaSP },
159
+ { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
160
+ { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
161
+ { kPFix_EMPTY, kIdentity, kPFix_DOTSP },
162
+ { kPFix_DOT, kIdentity, kPFix_EMPTY },
163
+ { kPFix_SP, kIdentity, kPFix_COMMASP },
164
+ { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
165
+ { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
166
+ { kPFix_EMPTY, kIdentity, kPFix_SQUOT },
167
+ { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
168
+ { kPFix_EMPTY, kIdentity, kPFix_SPbySP },
169
+ { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
170
+ { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
171
+ { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
172
+ { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
173
+ { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
174
+ { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
175
+ { kPFix_EMPTY, kIdentity, kPFix_SPonSP },
176
+ { kPFix_EMPTY, kIdentity, kPFix_SPasSP },
177
+ { kPFix_EMPTY, kIdentity, kPFix_SPisSP },
178
+ { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
179
+ { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
180
+ { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
181
+ { kPFix_EMPTY, kIdentity, kPFix_COLON },
182
+ { kPFix_SP, kIdentity, kPFix_DOTSP },
183
+ { kPFix_EMPTY, kIdentity, kPFix_edSP },
184
+ { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
185
+ { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
186
+ { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
187
+ { kPFix_EMPTY, kIdentity, kPFix_OPEN },
188
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
189
+ { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
190
+ { kPFix_EMPTY, kIdentity, kPFix_SPatSP },
191
+ { kPFix_EMPTY, kIdentity, kPFix_lySP },
192
+ { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
193
+ { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
194
+ { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
195
+ { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
196
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
197
+ { kPFix_DOT, kIdentity, kPFix_OPEN },
198
+ { kPFix_EMPTY, kUppercaseAll, kPFix_SP },
199
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
200
+ { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
201
+ { kPFix_SP, kIdentity, kPFix_DOT },
202
+ { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
203
+ { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
204
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
205
+ { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
206
+ { kPFix_EMPTY, kIdentity, kPFix_COMMA },
207
+ { kPFix_DOT, kIdentity, kPFix_SP },
208
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
209
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
210
+ { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
211
+ { kPFix_SP, kIdentity, kPFix_EQDQUOT },
212
+ { kPFix_EMPTY, kIdentity, kPFix_erSP },
213
+ { kPFix_SP, kUppercaseAll, kPFix_SP },
214
+ { kPFix_EMPTY, kIdentity, kPFix_alSP },
215
+ { kPFix_SP, kUppercaseAll, kPFix_EMPTY },
216
+ { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
217
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
218
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
219
+ { kPFix_SP, kIdentity, kPFix_OPEN },
220
+ { kPFix_EMPTY, kIdentity, kPFix_fulSP },
221
+ { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
222
+ { kPFix_EMPTY, kIdentity, kPFix_iveSP },
223
+ { kPFix_EMPTY, kIdentity, kPFix_lessSP },
224
+ { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
225
+ { kPFix_EMPTY, kIdentity, kPFix_estSP },
226
+ { kPFix_SP, kUppercaseFirst, kPFix_DOT },
227
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
228
+ { kPFix_SP, kIdentity, kPFix_EQSQUOT },
229
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
230
+ { kPFix_EMPTY, kIdentity, kPFix_izeSP },
231
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
232
+ { kPFix_NBSP, kIdentity, kPFix_EMPTY },
233
+ { kPFix_SP, kIdentity, kPFix_COMMA },
234
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
235
+ { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
236
+ { kPFix_EMPTY, kIdentity, kPFix_ousSP },
237
+ { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
238
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
239
+ { kPFix_SP, kUppercaseFirst, kPFix_COMMA },
240
+ { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
241
+ { kPFix_SP, kUppercaseAll, kPFix_COMMASP },
242
+ { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
243
+ { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
244
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
245
+ { kPFix_SP, kUppercaseAll, kPFix_DOT },
246
+ { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
247
+ { kPFix_SP, kUppercaseAll, kPFix_DOTSP },
248
+ { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
249
+ { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
250
+ { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
251
+ };
252
+
253
+ static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
254
+
255
+ static int ToUpperCase(uint8_t *p) {
256
+ if (p[0] < 0xc0) {
257
+ if (p[0] >= 'a' && p[0] <= 'z') {
258
+ p[0] ^= 32;
259
+ }
260
+ return 1;
261
+ }
262
+ /* An overly simplified uppercasing model for utf-8. */
263
+ if (p[0] < 0xe0) {
264
+ p[1] ^= 32;
265
+ return 2;
266
+ }
267
+ /* An arbitrary transform for three byte characters. */
268
+ p[2] ^= 5;
269
+ return 3;
270
+ }
271
+
272
+ static BROTLI_NOINLINE int TransformDictionaryWord(
273
+ uint8_t* dst, const uint8_t* word, int len, int transform) {
274
+ int idx = 0;
275
+ {
276
+ const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
277
+ while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
278
+ }
279
+ {
280
+ const int t = kTransforms[transform].transform;
281
+ int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
282
+ int i = 0;
283
+ uint8_t* uppercase;
284
+ if (skip > len) {
285
+ skip = len;
286
+ }
287
+ word += skip;
288
+ len -= skip;
289
+ if (t <= kOmitLast9) {
290
+ len -= t;
291
+ }
292
+ while (i < len) { dst[idx++] = word[i++]; }
293
+ uppercase = &dst[idx - len];
294
+ if (t == kUppercaseFirst) {
295
+ ToUpperCase(uppercase);
296
+ } else if (t == kUppercaseAll) {
297
+ while (len > 0) {
298
+ int step = ToUpperCase(uppercase);
299
+ uppercase += step;
300
+ len -= step;
301
+ }
302
+ }
303
+ }
304
+ {
305
+ const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
306
+ while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
307
+ return idx;
308
+ }
309
+ }
310
+
311
+ #if defined(__cplusplus) || defined(c_plusplus)
312
+ } /* extern "C" */
313
+ #endif
314
+
315
+ #endif /* BROTLI_DEC_TRANSFORM_H_ */
@@ -0,0 +1,36 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ */
15
+
16
+ /* Common types */
17
+
18
+ #ifndef BROTLI_DEC_TYPES_H_
19
+ #define BROTLI_DEC_TYPES_H_
20
+
21
+ #include <stddef.h> /* for size_t */
22
+
23
+ #if defined(_MSC_VER) && (_MSC_VER < 1600)
24
+ typedef signed char int8_t;
25
+ typedef unsigned char uint8_t;
26
+ typedef signed short int16_t;
27
+ typedef unsigned short uint16_t;
28
+ typedef signed int int32_t;
29
+ typedef unsigned int uint32_t;
30
+ typedef unsigned long long int uint64_t;
31
+ typedef long long int int64_t;
32
+ #else
33
+ #include <stdint.h>
34
+ #endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
35
+
36
+ #endif /* BROTLI_DEC_TYPES_H_ */
@@ -0,0 +1,769 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Function to find backward reference copies.
16
+
17
+ #include "./backward_references.h"
18
+
19
+ #include <algorithm>
20
+ #include <limits>
21
+ #include <vector>
22
+
23
+ #include "./command.h"
24
+ #include "./fast_log.h"
25
+
26
+ namespace brotli {
27
+
28
+ static const double kInfinity = std::numeric_limits<double>::infinity();
29
+
30
+ // Histogram based cost model for zopflification.
31
+ class ZopfliCostModel {
32
+ public:
33
+ void SetFromCommands(size_t num_bytes,
34
+ size_t position,
35
+ const uint8_t* ringbuffer,
36
+ size_t ringbuffer_mask,
37
+ const Command* commands,
38
+ int num_commands,
39
+ int last_insert_len) {
40
+ std::vector<int> histogram_literal(256, 0);
41
+ std::vector<int> histogram_cmd(kNumCommandPrefixes, 0);
42
+ std::vector<int> histogram_dist(kNumDistancePrefixes, 0);
43
+
44
+ size_t pos = position - last_insert_len;
45
+ for (int i = 0; i < num_commands; i++) {
46
+ int inslength = commands[i].insert_len_;
47
+ int copylength = commands[i].copy_len_;
48
+ int distcode = commands[i].dist_prefix_;
49
+ int cmdcode = commands[i].cmd_prefix_;
50
+
51
+ histogram_cmd[cmdcode]++;
52
+ if (cmdcode >= 128) histogram_dist[distcode]++;
53
+
54
+ for (int j = 0; j < inslength; j++) {
55
+ histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
56
+ }
57
+
58
+ pos += inslength + copylength;
59
+ }
60
+
61
+ std::vector<double> cost_literal;
62
+ Set(histogram_literal, &cost_literal);
63
+ Set(histogram_cmd, &cost_cmd_);
64
+ Set(histogram_dist, &cost_dist_);
65
+
66
+ min_cost_cmd_ = kInfinity;
67
+ for (int i = 0; i < kNumCommandPrefixes; ++i) {
68
+ min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
69
+ }
70
+
71
+ literal_costs_.resize(num_bytes + 1);
72
+ literal_costs_[0] = 0.0;
73
+ for (int i = 0; i < num_bytes; ++i) {
74
+ literal_costs_[i + 1] = literal_costs_[i] +
75
+ cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
76
+ }
77
+ }
78
+
79
+ void SetFromLiteralCosts(size_t num_bytes,
80
+ size_t position,
81
+ const float* literal_cost,
82
+ size_t literal_cost_mask) {
83
+ literal_costs_.resize(num_bytes + 1);
84
+ literal_costs_[0] = 0.0;
85
+ if (literal_cost) {
86
+ for (int i = 0; i < num_bytes; ++i) {
87
+ literal_costs_[i + 1] = literal_costs_[i] +
88
+ literal_cost[(position + i) & literal_cost_mask];
89
+ }
90
+ } else {
91
+ for (int i = 1; i <= num_bytes; ++i) {
92
+ literal_costs_[i] = i * 5.4;
93
+ }
94
+ }
95
+ cost_cmd_.resize(kNumCommandPrefixes);
96
+ cost_dist_.resize(kNumDistancePrefixes);
97
+ for (int i = 0; i < kNumCommandPrefixes; ++i) {
98
+ cost_cmd_[i] = FastLog2(11 + i);
99
+ }
100
+ for (int i = 0; i < kNumDistancePrefixes; ++i) {
101
+ cost_dist_[i] = FastLog2(20 + i);
102
+ }
103
+ min_cost_cmd_ = FastLog2(11);
104
+ }
105
+
106
+ double GetCommandCost(
107
+ int dist_code, int length_code, int insert_length) const {
108
+ int inscode = GetInsertLengthCode(insert_length);
109
+ int copycode = GetCopyLengthCode(length_code);
110
+ uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code);
111
+ uint64_t insnumextra = insextra[inscode];
112
+ uint64_t copynumextra = copyextra[copycode];
113
+ uint16_t dist_symbol;
114
+ uint32_t distextra;
115
+ PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
116
+ uint32_t distnumextra = distextra >> 24;
117
+
118
+ double result = insnumextra + copynumextra + distnumextra;
119
+ result += cost_cmd_[cmdcode];
120
+ if (cmdcode >= 128) result += cost_dist_[dist_symbol];
121
+ return result;
122
+ }
123
+
124
+ double GetLiteralCosts(int from, int to) const {
125
+ return literal_costs_[to] - literal_costs_[from];
126
+ }
127
+
128
+ double GetMinCostCmd() const {
129
+ return min_cost_cmd_;
130
+ }
131
+
132
+ private:
133
+ void Set(const std::vector<int>& histogram, std::vector<double>* cost) {
134
+ cost->resize(histogram.size());
135
+ int sum = 0;
136
+ for (size_t i = 0; i < histogram.size(); i++) {
137
+ sum += histogram[i];
138
+ }
139
+ double log2sum = FastLog2(sum);
140
+ for (size_t i = 0; i < histogram.size(); i++) {
141
+ if (histogram[i] == 0) {
142
+ (*cost)[i] = log2sum + 2;
143
+ continue;
144
+ }
145
+
146
+ // Shannon bits for this symbol.
147
+ (*cost)[i] = log2sum - FastLog2(histogram[i]);
148
+
149
+ // Cannot be coded with less than 1 bit
150
+ if ((*cost)[i] < 1) (*cost)[i] = 1;
151
+ }
152
+ }
153
+
154
+ std::vector<double> cost_cmd_; // The insert and copy length symbols.
155
+ std::vector<double> cost_dist_;
156
+ // Cumulative costs of literals per position in the stream.
157
+ std::vector<double> literal_costs_;
158
+ double min_cost_cmd_;
159
+ };
160
+
161
+ inline void SetDistanceCache(int distance,
162
+ int distance_code,
163
+ int max_distance,
164
+ const int* dist_cache,
165
+ int* result_dist_cache) {
166
+ if (distance <= max_distance && distance_code > 0) {
167
+ result_dist_cache[0] = distance;
168
+ memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
169
+ } else {
170
+ memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
171
+ }
172
+ }
173
+
174
+ inline int ComputeDistanceCode(int distance,
175
+ int max_distance,
176
+ int quality,
177
+ const int* dist_cache) {
178
+ if (distance <= max_distance) {
179
+ if (distance == dist_cache[0]) {
180
+ return 0;
181
+ } else if (distance == dist_cache[1]) {
182
+ return 1;
183
+ } else if (distance == dist_cache[2]) {
184
+ return 2;
185
+ } else if (distance == dist_cache[3]) {
186
+ return 3;
187
+ } else if (quality > 3 && distance >= 6) {
188
+ for (int k = 4; k < kNumDistanceShortCodes; ++k) {
189
+ int idx = kDistanceCacheIndex[k];
190
+ int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
191
+ static const int kLimits[16] = { 0, 0, 0, 0,
192
+ 6, 6, 11, 11,
193
+ 11, 11, 11, 11,
194
+ 12, 12, 12, 12 };
195
+ if (distance == candidate && distance >= kLimits[k]) {
196
+ return k;
197
+ }
198
+ }
199
+ }
200
+ }
201
+ return distance + 15;
202
+ }
203
+
204
+ struct ZopfliNode {
205
+ ZopfliNode() : length(1),
206
+ distance(0),
207
+ distance_code(0),
208
+ length_code(0),
209
+ insert_length(0),
210
+ cost(kInfinity) {}
211
+
212
+ // best length to get up to this byte (not including this byte itself)
213
+ int length;
214
+ // distance associated with the length
215
+ int distance;
216
+ int distance_code;
217
+ int distance_cache[4];
218
+ // length code associated with the length - usually the same as length,
219
+ // except in case of length-changing dictionary transformation.
220
+ int length_code;
221
+ // number of literal inserts before this copy
222
+ int insert_length;
223
+ // smallest cost to get to this byte from the beginning, as found so far
224
+ double cost;
225
+ };
226
+
227
+ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
228
+ int len, int len_code, int dist, int dist_code,
229
+ int max_dist, const int* dist_cache,
230
+ double cost) {
231
+ ZopfliNode& next = nodes[pos + len];
232
+ next.length = len;
233
+ next.length_code = len_code;
234
+ next.distance = dist;
235
+ next.distance_code = dist_code;
236
+ next.insert_length = pos - start_pos;
237
+ next.cost = cost;
238
+ SetDistanceCache(dist, dist_code, max_dist, dist_cache,
239
+ &next.distance_cache[0]);
240
+ }
241
+
242
+ // Maintains the smallest 2^k cost difference together with their positions
243
+ class StartPosQueue {
244
+ public:
245
+ explicit StartPosQueue(int bits)
246
+ : mask_((1 << bits) - 1), q_(1 << bits), idx_(0) {}
247
+
248
+ void Clear() {
249
+ idx_ = 0;
250
+ }
251
+
252
+ void Push(size_t pos, double costdiff) {
253
+ if (costdiff == kInfinity) {
254
+ // We can't start a command from an unreachable start position.
255
+ // E.g. position 1 in a stream is always unreachable, because all commands
256
+ // have a copy of at least length 2.
257
+ return;
258
+ }
259
+ q_[idx_ & mask_] = std::make_pair(pos, costdiff);
260
+ // Restore the sorted order.
261
+ for (int i = idx_; i > 0 && i > idx_ - mask_; --i) {
262
+ if (q_[i & mask_].second > q_[(i - 1) & mask_].second) {
263
+ std::swap(q_[i & mask_], q_[(i - 1) & mask_]);
264
+ }
265
+ }
266
+ ++idx_;
267
+ }
268
+
269
+ int size() const { return std::min<int>(idx_, mask_ + 1); }
270
+
271
+ size_t GetStartPos(int k) const {
272
+ return q_[(idx_ - k - 1) & mask_].first;
273
+ }
274
+
275
+ private:
276
+ const int mask_;
277
+ std::vector<std::pair<size_t, double> > q_;
278
+ int idx_;
279
+ };
280
+
281
+ // Returns the minimum possible copy length that can improve the cost of any
282
+ // future position.
283
+ int ComputeMinimumCopyLength(const StartPosQueue& queue,
284
+ const std::vector<ZopfliNode>& nodes,
285
+ const ZopfliCostModel& model,
286
+ size_t pos,
287
+ double min_cost_cmd) {
288
+ // Compute the minimum possible cost of reaching any future position.
289
+ const size_t start0 = queue.GetStartPos(0);
290
+ double min_cost = (nodes[start0].cost +
291
+ model.GetLiteralCosts(start0, pos) +
292
+ min_cost_cmd);
293
+ int len = 2;
294
+ int next_len_bucket = 4;
295
+ int next_len_offset = 10;
296
+ while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
297
+ // We already reached (pos + len) with no more cost than the minimum
298
+ // possible cost of reaching anything from this pos, so there is no point in
299
+ // looking for lengths <= len.
300
+ ++len;
301
+ if (len == next_len_offset) {
302
+ // We reached the next copy length code bucket, so we add one more
303
+ // extra bit to the minimum cost.
304
+ min_cost += 1.0;
305
+ next_len_offset += next_len_bucket;
306
+ next_len_bucket *= 2;
307
+ }
308
+ }
309
+ return len;
310
+ }
311
+
312
+ void ZopfliIterate(size_t num_bytes,
313
+ size_t position,
314
+ const uint8_t* ringbuffer,
315
+ size_t ringbuffer_mask,
316
+ const size_t max_backward_limit,
317
+ const ZopfliCostModel& model,
318
+ const std::vector<int>& num_matches,
319
+ const std::vector<BackwardMatch>& matches,
320
+ int* dist_cache,
321
+ int* last_insert_len,
322
+ Command* commands,
323
+ int* num_commands,
324
+ int* num_literals) {
325
+ const Command * const orig_commands = commands;
326
+
327
+ std::vector<ZopfliNode> nodes(num_bytes + 1);
328
+ nodes[0].length = 0;
329
+ nodes[0].cost = 0;
330
+ memcpy(nodes[0].distance_cache, dist_cache, 4 * sizeof(dist_cache[0]));
331
+
332
+ StartPosQueue queue(3);
333
+ const double min_cost_cmd = model.GetMinCostCmd();
334
+
335
+ size_t cur_match_pos = 0;
336
+ for (size_t i = 0; i + 3 < num_bytes; i++) {
337
+ size_t cur_ix = position + i;
338
+ size_t cur_ix_masked = cur_ix & ringbuffer_mask;
339
+ size_t max_distance = std::min(cur_ix, max_backward_limit);
340
+ int max_length = num_bytes - i;
341
+
342
+ queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
343
+
344
+ const int min_len = ComputeMinimumCopyLength(queue, nodes, model,
345
+ i, min_cost_cmd);
346
+
347
+ // Go over the command starting positions in order of increasing cost
348
+ // difference.
349
+ for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
350
+ const size_t start = queue.GetStartPos(k);
351
+ const double start_costdiff =
352
+ nodes[start].cost - model.GetLiteralCosts(0, start);
353
+ const int* dist_cache2 = &nodes[start].distance_cache[0];
354
+
355
+ // Look for last distance matches using the distance cache from this
356
+ // starting position.
357
+ int best_len = min_len - 1;
358
+ for (int j = 0; j < kNumDistanceShortCodes; ++j) {
359
+ const int idx = kDistanceCacheIndex[j];
360
+ const int backward = dist_cache2[idx] + kDistanceCacheOffset[j];
361
+ size_t prev_ix = cur_ix - backward;
362
+ if (prev_ix >= cur_ix) {
363
+ continue;
364
+ }
365
+ if (PREDICT_FALSE(backward > max_distance)) {
366
+ continue;
367
+ }
368
+ prev_ix &= ringbuffer_mask;
369
+
370
+ if (cur_ix_masked + best_len > ringbuffer_mask ||
371
+ prev_ix + best_len > ringbuffer_mask ||
372
+ ringbuffer[cur_ix_masked + best_len] !=
373
+ ringbuffer[prev_ix + best_len]) {
374
+ continue;
375
+ }
376
+ const size_t len =
377
+ FindMatchLengthWithLimit(&ringbuffer[prev_ix],
378
+ &ringbuffer[cur_ix_masked],
379
+ max_length);
380
+ for (int l = best_len + 1; l <= len; ++l) {
381
+ double cmd_cost = model.GetCommandCost(j, l, i - start);
382
+ double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
383
+ if (cost < nodes[i + l].cost) {
384
+ UpdateZopfliNode(&nodes[0], i, start, l, l, backward, j,
385
+ max_distance, dist_cache2, cost);
386
+ }
387
+ best_len = l;
388
+ }
389
+ }
390
+
391
+ // At higher iterations look only for new last distance matches, since
392
+ // looking only for new command start positions with the same distances
393
+ // does not help much.
394
+ if (k >= 2) continue;
395
+
396
+ // Loop through all possible copy lengths at this position.
397
+ int len = min_len;
398
+ for (int j = 0; j < num_matches[i]; ++j) {
399
+ BackwardMatch match = matches[cur_match_pos + j];
400
+ int dist = match.distance;
401
+ bool is_dictionary_match = dist > max_distance;
402
+ // We already tried all possible last distance matches, so we can use
403
+ // normal distance code here.
404
+ int dist_code = dist + 15;
405
+ // Try all copy lengths up until the maximum copy length corresponding
406
+ // to this distance. If the distance refers to the static dictionary, or
407
+ // the maximum length is long enough, try only one maximum length.
408
+ int max_len = match.length();
409
+ if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
410
+ len = max_len;
411
+ }
412
+ for (; len <= max_len; ++len) {
413
+ int len_code = is_dictionary_match ? match.length_code() : len;
414
+ double cmd_cost =
415
+ model.GetCommandCost(dist_code, len_code, i - start);
416
+ double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
417
+ if (cost < nodes[i + len].cost) {
418
+ UpdateZopfliNode(&nodes[0], i, start, len, len_code, dist,
419
+ dist_code, max_distance, dist_cache2, cost);
420
+ }
421
+ }
422
+ }
423
+ }
424
+
425
+ cur_match_pos += num_matches[i];
426
+
427
+ // The zopflification can be too slow in case of very long lengths, so in
428
+ // such case skip it all, it does not cost a lot of compression ratio.
429
+ if (num_matches[i] == 1 &&
430
+ matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
431
+ i += matches[cur_match_pos - 1].length() - 1;
432
+ queue.Clear();
433
+ }
434
+ }
435
+
436
+ std::vector<int> backwards;
437
+ size_t index = num_bytes;
438
+ while (nodes[index].cost == kInfinity) --index;
439
+ while (index > 0) {
440
+ int len = nodes[index].length + nodes[index].insert_length;
441
+ backwards.push_back(len);
442
+ index -= len;
443
+ }
444
+
445
+ std::vector<int> path;
446
+ for (size_t i = backwards.size(); i > 0; i--) {
447
+ path.push_back(backwards[i - 1]);
448
+ }
449
+
450
+ size_t pos = 0;
451
+ for (size_t i = 0; i < path.size(); i++) {
452
+ const ZopfliNode& next = nodes[pos + path[i]];
453
+ int copy_length = next.length;
454
+ int insert_length = next.insert_length;
455
+ pos += insert_length;
456
+ if (i == 0) {
457
+ insert_length += *last_insert_len;
458
+ *last_insert_len = 0;
459
+ }
460
+ int distance = next.distance;
461
+ int len_code = next.length_code;
462
+ size_t max_distance = std::min(position + pos, max_backward_limit);
463
+ bool is_dictionary = (distance > max_distance);
464
+ int dist_code = next.distance_code;
465
+
466
+ Command cmd(insert_length, copy_length, len_code, dist_code);
467
+ *commands++ = cmd;
468
+
469
+ if (!is_dictionary && dist_code > 0) {
470
+ dist_cache[3] = dist_cache[2];
471
+ dist_cache[2] = dist_cache[1];
472
+ dist_cache[1] = dist_cache[0];
473
+ dist_cache[0] = distance;
474
+ }
475
+
476
+ *num_literals += insert_length;
477
+ insert_length = 0;
478
+ pos += copy_length;
479
+ }
480
+ *last_insert_len += num_bytes - pos;
481
+ *num_commands += (commands - orig_commands);
482
+ }
483
+
484
+ template<typename Hasher>
485
+ void CreateBackwardReferences(size_t num_bytes,
486
+ size_t position,
487
+ const uint8_t* ringbuffer,
488
+ size_t ringbuffer_mask,
489
+ const size_t max_backward_limit,
490
+ const int quality,
491
+ Hasher* hasher,
492
+ int* dist_cache,
493
+ int* last_insert_len,
494
+ Command* commands,
495
+ int* num_commands,
496
+ int* num_literals) {
497
+ if (num_bytes >= 3 && position >= 3) {
498
+ // Prepare the hashes for three last bytes of the last write.
499
+ // These could not be calculated before, since they require knowledge
500
+ // of both the previous and the current block.
501
+ hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
502
+ position - 3);
503
+ hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
504
+ position - 2);
505
+ hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
506
+ position - 1);
507
+ }
508
+ const Command * const orig_commands = commands;
509
+ int insert_length = *last_insert_len;
510
+ size_t i = position & ringbuffer_mask;
511
+ const int i_diff = position - i;
512
+ const size_t i_end = i + num_bytes;
513
+
514
+ // For speed up heuristics for random data.
515
+ const int random_heuristics_window_size = quality < 9 ? 64 : 512;
516
+ int apply_random_heuristics = i + random_heuristics_window_size;
517
+
518
+ // Minimum score to accept a backward reference.
519
+ const int kMinScore = 4.0;
520
+
521
+ while (i + Hasher::kHashTypeLength - 1 < i_end) {
522
+ int max_length = i_end - i;
523
+ size_t max_distance = std::min(i + i_diff, max_backward_limit);
524
+ int best_len = 0;
525
+ int best_len_code = 0;
526
+ int best_dist = 0;
527
+ double best_score = kMinScore;
528
+ bool match_found = hasher->FindLongestMatch(
529
+ ringbuffer, ringbuffer_mask,
530
+ dist_cache, i + i_diff, max_length, max_distance,
531
+ &best_len, &best_len_code, &best_dist, &best_score);
532
+ if (match_found) {
533
+ // Found a match. Let's look for something even better ahead.
534
+ int delayed_backward_references_in_row = 0;
535
+ for (;;) {
536
+ --max_length;
537
+ int best_len_2 = quality < 5 ? std::min(best_len - 1, max_length) : 0;
538
+ int best_len_code_2 = 0;
539
+ int best_dist_2 = 0;
540
+ double best_score_2 = kMinScore;
541
+ max_distance = std::min(i + i_diff + 1, max_backward_limit);
542
+ hasher->Store(ringbuffer + i, i + i_diff);
543
+ match_found = hasher->FindLongestMatch(
544
+ ringbuffer, ringbuffer_mask,
545
+ dist_cache, i + i_diff + 1, max_length, max_distance,
546
+ &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
547
+ double cost_diff_lazy = 7.0;
548
+ if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
549
+ // Ok, let's just write one byte for now and start a match from the
550
+ // next byte.
551
+ ++i;
552
+ ++insert_length;
553
+ best_len = best_len_2;
554
+ best_len_code = best_len_code_2;
555
+ best_dist = best_dist_2;
556
+ best_score = best_score_2;
557
+ if (++delayed_backward_references_in_row < 4) {
558
+ continue;
559
+ }
560
+ }
561
+ break;
562
+ }
563
+ apply_random_heuristics =
564
+ i + 2 * best_len + random_heuristics_window_size;
565
+ max_distance = std::min(i + i_diff, max_backward_limit);
566
+ // The first 16 codes are special shortcodes, and the minimum offset is 1.
567
+ int distance_code =
568
+ ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
569
+ if (best_dist <= max_distance && distance_code > 0) {
570
+ dist_cache[3] = dist_cache[2];
571
+ dist_cache[2] = dist_cache[1];
572
+ dist_cache[1] = dist_cache[0];
573
+ dist_cache[0] = best_dist;
574
+ }
575
+ Command cmd(insert_length, best_len, best_len_code, distance_code);
576
+ *commands++ = cmd;
577
+ *num_literals += insert_length;
578
+ insert_length = 0;
579
+ // Put the hash keys into the table, if there are enough
580
+ // bytes left.
581
+ for (int j = 1; j < best_len; ++j) {
582
+ hasher->Store(&ringbuffer[i + j], i + i_diff + j);
583
+ }
584
+ i += best_len;
585
+ } else {
586
+ ++insert_length;
587
+ hasher->Store(ringbuffer + i, i + i_diff);
588
+ ++i;
589
+ // If we have not seen matches for a long time, we can skip some
590
+ // match lookups. Unsuccessful match lookups are very very expensive
591
+ // and this kind of a heuristic speeds up compression quite
592
+ // a lot.
593
+ if (i > apply_random_heuristics) {
594
+ // Going through uncompressible data, jump.
595
+ if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
596
+ // It is quite a long time since we saw a copy, so we assume
597
+ // that this data is not compressible, and store hashes less
598
+ // often. Hashes of non compressible data are less likely to
599
+ // turn out to be useful in the future, too, so we store less of
600
+ // them to not to flood out the hash table of good compressible
601
+ // data.
602
+ int i_jump = std::min(i + 16, i_end - 4);
603
+ for (; i < i_jump; i += 4) {
604
+ hasher->Store(ringbuffer + i, i + i_diff);
605
+ insert_length += 4;
606
+ }
607
+ } else {
608
+ int i_jump = std::min(i + 8, i_end - 3);
609
+ for (; i < i_jump; i += 2) {
610
+ hasher->Store(ringbuffer + i, i + i_diff);
611
+ insert_length += 2;
612
+ }
613
+ }
614
+ }
615
+ }
616
+ }
617
+ insert_length += (i_end - i);
618
+ *last_insert_len = insert_length;
619
+ *num_commands += (commands - orig_commands);
620
+ }
621
+
622
+ void CreateBackwardReferences(size_t num_bytes,
623
+ size_t position,
624
+ const uint8_t* ringbuffer,
625
+ size_t ringbuffer_mask,
626
+ const float* literal_cost,
627
+ size_t literal_cost_mask,
628
+ const size_t max_backward_limit,
629
+ const int quality,
630
+ Hashers* hashers,
631
+ int hash_type,
632
+ int* dist_cache,
633
+ int* last_insert_len,
634
+ Command* commands,
635
+ int* num_commands,
636
+ int* num_literals) {
637
+ bool zopflify = quality > 9;
638
+ if (zopflify) {
639
+ Hashers::H9* hasher = hashers->hash_h9.get();
640
+ if (num_bytes >= 3 && position >= 3) {
641
+ // Prepare the hashes for three last bytes of the last write.
642
+ // These could not be calculated before, since they require knowledge
643
+ // of both the previous and the current block.
644
+ hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
645
+ position - 3);
646
+ hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
647
+ position - 2);
648
+ hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
649
+ position - 1);
650
+ }
651
+ std::vector<int> num_matches(num_bytes);
652
+ std::vector<BackwardMatch> matches(3 * num_bytes);
653
+ size_t cur_match_pos = 0;
654
+ for (size_t i = 0; i + 3 < num_bytes; ++i) {
655
+ size_t max_distance = std::min(position + i, max_backward_limit);
656
+ int max_length = num_bytes - i;
657
+ // Ensure that we have at least kMaxZopfliLen free slots.
658
+ if (matches.size() < cur_match_pos + kMaxZopfliLen) {
659
+ matches.resize(cur_match_pos + kMaxZopfliLen);
660
+ }
661
+ hasher->FindAllMatches(
662
+ ringbuffer, ringbuffer_mask,
663
+ position + i, max_length, max_distance,
664
+ &num_matches[i], &matches[cur_match_pos]);
665
+ hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
666
+ position + i);
667
+ cur_match_pos += num_matches[i];
668
+ if (num_matches[i] == 1) {
669
+ const int match_len = matches[cur_match_pos - 1].length();
670
+ if (match_len > kMaxZopfliLen) {
671
+ for (int j = 1; j < match_len; ++j) {
672
+ ++i;
673
+ hasher->Store(
674
+ &ringbuffer[(position + i) & ringbuffer_mask], position + i);
675
+ num_matches[i] = 0;
676
+ }
677
+ }
678
+ }
679
+ }
680
+ int orig_num_literals = *num_literals;
681
+ int orig_last_insert_len = *last_insert_len;
682
+ int orig_dist_cache[4] = {
683
+ dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
684
+ };
685
+ int orig_num_commands = *num_commands;
686
+ static const int kIterations = 2;
687
+ for (int i = 0; i < kIterations; i++) {
688
+ ZopfliCostModel model;
689
+ if (i == 0) {
690
+ model.SetFromLiteralCosts(num_bytes, position,
691
+ literal_cost, literal_cost_mask);
692
+ } else {
693
+ model.SetFromCommands(num_bytes, position,
694
+ ringbuffer, ringbuffer_mask,
695
+ commands, *num_commands - orig_num_commands,
696
+ orig_last_insert_len);
697
+ }
698
+ *num_commands = orig_num_commands;
699
+ *num_literals = orig_num_literals;
700
+ *last_insert_len = orig_last_insert_len;
701
+ memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
702
+ ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
703
+ max_backward_limit, model, num_matches, matches, dist_cache,
704
+ last_insert_len, commands, num_commands, num_literals);
705
+ }
706
+ return;
707
+ }
708
+
709
+ switch (hash_type) {
710
+ case 1:
711
+ CreateBackwardReferences<Hashers::H1>(
712
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
713
+ quality, hashers->hash_h1.get(), dist_cache, last_insert_len,
714
+ commands, num_commands, num_literals);
715
+ break;
716
+ case 2:
717
+ CreateBackwardReferences<Hashers::H2>(
718
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
719
+ quality, hashers->hash_h2.get(), dist_cache, last_insert_len,
720
+ commands, num_commands, num_literals);
721
+ break;
722
+ case 3:
723
+ CreateBackwardReferences<Hashers::H3>(
724
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
725
+ quality, hashers->hash_h3.get(), dist_cache, last_insert_len,
726
+ commands, num_commands, num_literals);
727
+ break;
728
+ case 4:
729
+ CreateBackwardReferences<Hashers::H4>(
730
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
731
+ quality, hashers->hash_h4.get(), dist_cache, last_insert_len,
732
+ commands, num_commands, num_literals);
733
+ break;
734
+ case 5:
735
+ CreateBackwardReferences<Hashers::H5>(
736
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
737
+ quality, hashers->hash_h5.get(), dist_cache, last_insert_len,
738
+ commands, num_commands, num_literals);
739
+ break;
740
+ case 6:
741
+ CreateBackwardReferences<Hashers::H6>(
742
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
743
+ quality, hashers->hash_h6.get(), dist_cache, last_insert_len,
744
+ commands, num_commands, num_literals);
745
+ break;
746
+ case 7:
747
+ CreateBackwardReferences<Hashers::H7>(
748
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
749
+ quality, hashers->hash_h7.get(), dist_cache, last_insert_len,
750
+ commands, num_commands, num_literals);
751
+ break;
752
+ case 8:
753
+ CreateBackwardReferences<Hashers::H8>(
754
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
755
+ quality, hashers->hash_h8.get(), dist_cache, last_insert_len,
756
+ commands, num_commands, num_literals);
757
+ break;
758
+ case 9:
759
+ CreateBackwardReferences<Hashers::H9>(
760
+ num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
761
+ quality, hashers->hash_h9.get(), dist_cache, last_insert_len,
762
+ commands, num_commands, num_literals);
763
+ break;
764
+ default:
765
+ break;
766
+ }
767
+ }
768
+
769
+ } // namespace brotli