extbrotli 0.0.1.PROTOTYPE
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,315 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Transformations on dictionary words. */
|
17
|
+
|
18
|
+
#ifndef BROTLI_DEC_TRANSFORM_H_
|
19
|
+
#define BROTLI_DEC_TRANSFORM_H_
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <ctype.h>
|
23
|
+
#include "./port.h"
|
24
|
+
#include "./types.h"
|
25
|
+
|
26
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
enum WordTransformType {
|
31
|
+
kIdentity = 0,
|
32
|
+
kOmitLast1 = 1,
|
33
|
+
kOmitLast2 = 2,
|
34
|
+
kOmitLast3 = 3,
|
35
|
+
kOmitLast4 = 4,
|
36
|
+
kOmitLast5 = 5,
|
37
|
+
kOmitLast6 = 6,
|
38
|
+
kOmitLast7 = 7,
|
39
|
+
kOmitLast8 = 8,
|
40
|
+
kOmitLast9 = 9,
|
41
|
+
kUppercaseFirst = 10,
|
42
|
+
kUppercaseAll = 11,
|
43
|
+
kOmitFirst1 = 12,
|
44
|
+
kOmitFirst2 = 13,
|
45
|
+
kOmitFirst3 = 14,
|
46
|
+
kOmitFirst4 = 15,
|
47
|
+
kOmitFirst5 = 16,
|
48
|
+
kOmitFirst6 = 17,
|
49
|
+
kOmitFirst7 = 18,
|
50
|
+
kOmitFirst8 = 19,
|
51
|
+
kOmitFirst9 = 20
|
52
|
+
};
|
53
|
+
|
54
|
+
typedef struct {
|
55
|
+
const uint8_t prefix_id;
|
56
|
+
const uint8_t transform;
|
57
|
+
const uint8_t suffix_id;
|
58
|
+
} Transform;
|
59
|
+
|
60
|
+
static const char kPrefixSuffix[208] =
|
61
|
+
"\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
|
62
|
+
" for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
|
63
|
+
" is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
|
64
|
+
" not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
|
65
|
+
|
66
|
+
enum {
|
67
|
+
/* EMPTY = ""
|
68
|
+
SP = " "
|
69
|
+
DQUOT = "\""
|
70
|
+
SQUOT = "'"
|
71
|
+
CLOSEBR = "]"
|
72
|
+
OPEN = "("
|
73
|
+
SLASH = "/"
|
74
|
+
NBSP = non-breaking space "\0xc2\xa0"
|
75
|
+
*/
|
76
|
+
kPFix_EMPTY = 0,
|
77
|
+
kPFix_SP = 1,
|
78
|
+
kPFix_COMMASP = 3,
|
79
|
+
kPFix_SPofSPtheSP = 6,
|
80
|
+
kPFix_SPtheSP = 9,
|
81
|
+
kPFix_eSP = 12,
|
82
|
+
kPFix_SPofSP = 15,
|
83
|
+
kPFix_sSP = 20,
|
84
|
+
kPFix_DOT = 23,
|
85
|
+
kPFix_SPandSP = 25,
|
86
|
+
kPFix_SPinSP = 31,
|
87
|
+
kPFix_DQUOT = 36,
|
88
|
+
kPFix_SPtoSP = 38,
|
89
|
+
kPFix_DQUOTGT = 43,
|
90
|
+
kPFix_NEWLINE = 46,
|
91
|
+
kPFix_DOTSP = 48,
|
92
|
+
kPFix_CLOSEBR = 51,
|
93
|
+
kPFix_SPforSP = 53,
|
94
|
+
kPFix_SPaSP = 59,
|
95
|
+
kPFix_SPthatSP = 63,
|
96
|
+
kPFix_SQUOT = 70,
|
97
|
+
kPFix_SPwithSP = 72,
|
98
|
+
kPFix_SPfromSP = 79,
|
99
|
+
kPFix_SPbySP = 86,
|
100
|
+
kPFix_OPEN = 91,
|
101
|
+
kPFix_DOTSPTheSP = 93,
|
102
|
+
kPFix_SPonSP = 100,
|
103
|
+
kPFix_SPasSP = 105,
|
104
|
+
kPFix_SPisSP = 110,
|
105
|
+
kPFix_ingSP = 115,
|
106
|
+
kPFix_NEWLINETAB = 120,
|
107
|
+
kPFix_COLON = 123,
|
108
|
+
kPFix_edSP = 125,
|
109
|
+
kPFix_EQDQUOT = 129,
|
110
|
+
kPFix_SPatSP = 132,
|
111
|
+
kPFix_lySP = 137,
|
112
|
+
kPFix_COMMA = 141,
|
113
|
+
kPFix_EQSQUOT = 143,
|
114
|
+
kPFix_DOTcomSLASH = 146,
|
115
|
+
kPFix_DOTSPThisSP = 152,
|
116
|
+
kPFix_SPnotSP = 160,
|
117
|
+
kPFix_erSP = 166,
|
118
|
+
kPFix_alSP = 170,
|
119
|
+
kPFix_fulSP = 174,
|
120
|
+
kPFix_iveSP = 179,
|
121
|
+
kPFix_lessSP = 184,
|
122
|
+
kPFix_estSP = 190,
|
123
|
+
kPFix_izeSP = 195,
|
124
|
+
kPFix_NBSP = 200,
|
125
|
+
kPFix_ousSP = 203
|
126
|
+
};
|
127
|
+
|
128
|
+
|
129
|
+
static const Transform kTransforms[] = {
|
130
|
+
{ kPFix_EMPTY, kIdentity, kPFix_EMPTY },
|
131
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SP },
|
132
|
+
{ kPFix_SP, kIdentity, kPFix_SP },
|
133
|
+
{ kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
|
134
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
|
135
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
|
136
|
+
{ kPFix_SP, kIdentity, kPFix_EMPTY },
|
137
|
+
{ kPFix_sSP, kIdentity, kPFix_SP },
|
138
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPofSP },
|
139
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
|
140
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPandSP },
|
141
|
+
{ kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
|
142
|
+
{ kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
|
143
|
+
{ kPFix_COMMASP, kIdentity, kPFix_SP },
|
144
|
+
{ kPFix_EMPTY, kIdentity, kPFix_COMMASP },
|
145
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_SP },
|
146
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPinSP },
|
147
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
|
148
|
+
{ kPFix_eSP, kIdentity, kPFix_SP },
|
149
|
+
{ kPFix_EMPTY, kIdentity, kPFix_DQUOT },
|
150
|
+
{ kPFix_EMPTY, kIdentity, kPFix_DOT },
|
151
|
+
{ kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
|
152
|
+
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
|
153
|
+
{ kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
|
154
|
+
{ kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
|
155
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPforSP },
|
156
|
+
{ kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
|
157
|
+
{ kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
|
158
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPaSP },
|
159
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
|
160
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
|
161
|
+
{ kPFix_EMPTY, kIdentity, kPFix_DOTSP },
|
162
|
+
{ kPFix_DOT, kIdentity, kPFix_EMPTY },
|
163
|
+
{ kPFix_SP, kIdentity, kPFix_COMMASP },
|
164
|
+
{ kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
|
165
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
|
166
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SQUOT },
|
167
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
|
168
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPbySP },
|
169
|
+
{ kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
|
170
|
+
{ kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
|
171
|
+
{ kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
|
172
|
+
{ kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
|
173
|
+
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
|
174
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
|
175
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPonSP },
|
176
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPasSP },
|
177
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPisSP },
|
178
|
+
{ kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
|
179
|
+
{ kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
|
180
|
+
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
|
181
|
+
{ kPFix_EMPTY, kIdentity, kPFix_COLON },
|
182
|
+
{ kPFix_SP, kIdentity, kPFix_DOTSP },
|
183
|
+
{ kPFix_EMPTY, kIdentity, kPFix_edSP },
|
184
|
+
{ kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
|
185
|
+
{ kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
|
186
|
+
{ kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
|
187
|
+
{ kPFix_EMPTY, kIdentity, kPFix_OPEN },
|
188
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
|
189
|
+
{ kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
|
190
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPatSP },
|
191
|
+
{ kPFix_EMPTY, kIdentity, kPFix_lySP },
|
192
|
+
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
|
193
|
+
{ kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
|
194
|
+
{ kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
|
195
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
|
196
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
|
197
|
+
{ kPFix_DOT, kIdentity, kPFix_OPEN },
|
198
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_SP },
|
199
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
|
200
|
+
{ kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
|
201
|
+
{ kPFix_SP, kIdentity, kPFix_DOT },
|
202
|
+
{ kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
|
203
|
+
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
|
204
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
|
205
|
+
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
|
206
|
+
{ kPFix_EMPTY, kIdentity, kPFix_COMMA },
|
207
|
+
{ kPFix_DOT, kIdentity, kPFix_SP },
|
208
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
|
209
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
|
210
|
+
{ kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
|
211
|
+
{ kPFix_SP, kIdentity, kPFix_EQDQUOT },
|
212
|
+
{ kPFix_EMPTY, kIdentity, kPFix_erSP },
|
213
|
+
{ kPFix_SP, kUppercaseAll, kPFix_SP },
|
214
|
+
{ kPFix_EMPTY, kIdentity, kPFix_alSP },
|
215
|
+
{ kPFix_SP, kUppercaseAll, kPFix_EMPTY },
|
216
|
+
{ kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
|
217
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
|
218
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
|
219
|
+
{ kPFix_SP, kIdentity, kPFix_OPEN },
|
220
|
+
{ kPFix_EMPTY, kIdentity, kPFix_fulSP },
|
221
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
|
222
|
+
{ kPFix_EMPTY, kIdentity, kPFix_iveSP },
|
223
|
+
{ kPFix_EMPTY, kIdentity, kPFix_lessSP },
|
224
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
|
225
|
+
{ kPFix_EMPTY, kIdentity, kPFix_estSP },
|
226
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_DOT },
|
227
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
|
228
|
+
{ kPFix_SP, kIdentity, kPFix_EQSQUOT },
|
229
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
|
230
|
+
{ kPFix_EMPTY, kIdentity, kPFix_izeSP },
|
231
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
|
232
|
+
{ kPFix_NBSP, kIdentity, kPFix_EMPTY },
|
233
|
+
{ kPFix_SP, kIdentity, kPFix_COMMA },
|
234
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
|
235
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
|
236
|
+
{ kPFix_EMPTY, kIdentity, kPFix_ousSP },
|
237
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
|
238
|
+
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
|
239
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_COMMA },
|
240
|
+
{ kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
|
241
|
+
{ kPFix_SP, kUppercaseAll, kPFix_COMMASP },
|
242
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
|
243
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
|
244
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
|
245
|
+
{ kPFix_SP, kUppercaseAll, kPFix_DOT },
|
246
|
+
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
|
247
|
+
{ kPFix_SP, kUppercaseAll, kPFix_DOTSP },
|
248
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
|
249
|
+
{ kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
|
250
|
+
{ kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
|
251
|
+
};
|
252
|
+
|
253
|
+
static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
|
254
|
+
|
255
|
+
static int ToUpperCase(uint8_t *p) {
|
256
|
+
if (p[0] < 0xc0) {
|
257
|
+
if (p[0] >= 'a' && p[0] <= 'z') {
|
258
|
+
p[0] ^= 32;
|
259
|
+
}
|
260
|
+
return 1;
|
261
|
+
}
|
262
|
+
/* An overly simplified uppercasing model for utf-8. */
|
263
|
+
if (p[0] < 0xe0) {
|
264
|
+
p[1] ^= 32;
|
265
|
+
return 2;
|
266
|
+
}
|
267
|
+
/* An arbitrary transform for three byte characters. */
|
268
|
+
p[2] ^= 5;
|
269
|
+
return 3;
|
270
|
+
}
|
271
|
+
|
272
|
+
static BROTLI_NOINLINE int TransformDictionaryWord(
|
273
|
+
uint8_t* dst, const uint8_t* word, int len, int transform) {
|
274
|
+
int idx = 0;
|
275
|
+
{
|
276
|
+
const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
|
277
|
+
while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
|
278
|
+
}
|
279
|
+
{
|
280
|
+
const int t = kTransforms[transform].transform;
|
281
|
+
int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
|
282
|
+
int i = 0;
|
283
|
+
uint8_t* uppercase;
|
284
|
+
if (skip > len) {
|
285
|
+
skip = len;
|
286
|
+
}
|
287
|
+
word += skip;
|
288
|
+
len -= skip;
|
289
|
+
if (t <= kOmitLast9) {
|
290
|
+
len -= t;
|
291
|
+
}
|
292
|
+
while (i < len) { dst[idx++] = word[i++]; }
|
293
|
+
uppercase = &dst[idx - len];
|
294
|
+
if (t == kUppercaseFirst) {
|
295
|
+
ToUpperCase(uppercase);
|
296
|
+
} else if (t == kUppercaseAll) {
|
297
|
+
while (len > 0) {
|
298
|
+
int step = ToUpperCase(uppercase);
|
299
|
+
uppercase += step;
|
300
|
+
len -= step;
|
301
|
+
}
|
302
|
+
}
|
303
|
+
}
|
304
|
+
{
|
305
|
+
const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
|
306
|
+
while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
|
307
|
+
return idx;
|
308
|
+
}
|
309
|
+
}
|
310
|
+
|
311
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
312
|
+
} /* extern "C" */
|
313
|
+
#endif
|
314
|
+
|
315
|
+
#endif /* BROTLI_DEC_TRANSFORM_H_ */
|
@@ -0,0 +1,36 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Common types */
|
17
|
+
|
18
|
+
#ifndef BROTLI_DEC_TYPES_H_
|
19
|
+
#define BROTLI_DEC_TYPES_H_
|
20
|
+
|
21
|
+
#include <stddef.h> /* for size_t */
|
22
|
+
|
23
|
+
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
24
|
+
typedef signed char int8_t;
|
25
|
+
typedef unsigned char uint8_t;
|
26
|
+
typedef signed short int16_t;
|
27
|
+
typedef unsigned short uint16_t;
|
28
|
+
typedef signed int int32_t;
|
29
|
+
typedef unsigned int uint32_t;
|
30
|
+
typedef unsigned long long int uint64_t;
|
31
|
+
typedef long long int int64_t;
|
32
|
+
#else
|
33
|
+
#include <stdint.h>
|
34
|
+
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
|
35
|
+
|
36
|
+
#endif /* BROTLI_DEC_TYPES_H_ */
|
@@ -0,0 +1,769 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Function to find backward reference copies.
|
16
|
+
|
17
|
+
#include "./backward_references.h"
|
18
|
+
|
19
|
+
#include <algorithm>
|
20
|
+
#include <limits>
|
21
|
+
#include <vector>
|
22
|
+
|
23
|
+
#include "./command.h"
|
24
|
+
#include "./fast_log.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
static const double kInfinity = std::numeric_limits<double>::infinity();
|
29
|
+
|
30
|
+
// Histogram based cost model for zopflification.
|
31
|
+
class ZopfliCostModel {
|
32
|
+
public:
|
33
|
+
void SetFromCommands(size_t num_bytes,
|
34
|
+
size_t position,
|
35
|
+
const uint8_t* ringbuffer,
|
36
|
+
size_t ringbuffer_mask,
|
37
|
+
const Command* commands,
|
38
|
+
int num_commands,
|
39
|
+
int last_insert_len) {
|
40
|
+
std::vector<int> histogram_literal(256, 0);
|
41
|
+
std::vector<int> histogram_cmd(kNumCommandPrefixes, 0);
|
42
|
+
std::vector<int> histogram_dist(kNumDistancePrefixes, 0);
|
43
|
+
|
44
|
+
size_t pos = position - last_insert_len;
|
45
|
+
for (int i = 0; i < num_commands; i++) {
|
46
|
+
int inslength = commands[i].insert_len_;
|
47
|
+
int copylength = commands[i].copy_len_;
|
48
|
+
int distcode = commands[i].dist_prefix_;
|
49
|
+
int cmdcode = commands[i].cmd_prefix_;
|
50
|
+
|
51
|
+
histogram_cmd[cmdcode]++;
|
52
|
+
if (cmdcode >= 128) histogram_dist[distcode]++;
|
53
|
+
|
54
|
+
for (int j = 0; j < inslength; j++) {
|
55
|
+
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
|
56
|
+
}
|
57
|
+
|
58
|
+
pos += inslength + copylength;
|
59
|
+
}
|
60
|
+
|
61
|
+
std::vector<double> cost_literal;
|
62
|
+
Set(histogram_literal, &cost_literal);
|
63
|
+
Set(histogram_cmd, &cost_cmd_);
|
64
|
+
Set(histogram_dist, &cost_dist_);
|
65
|
+
|
66
|
+
min_cost_cmd_ = kInfinity;
|
67
|
+
for (int i = 0; i < kNumCommandPrefixes; ++i) {
|
68
|
+
min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
|
69
|
+
}
|
70
|
+
|
71
|
+
literal_costs_.resize(num_bytes + 1);
|
72
|
+
literal_costs_[0] = 0.0;
|
73
|
+
for (int i = 0; i < num_bytes; ++i) {
|
74
|
+
literal_costs_[i + 1] = literal_costs_[i] +
|
75
|
+
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
void SetFromLiteralCosts(size_t num_bytes,
|
80
|
+
size_t position,
|
81
|
+
const float* literal_cost,
|
82
|
+
size_t literal_cost_mask) {
|
83
|
+
literal_costs_.resize(num_bytes + 1);
|
84
|
+
literal_costs_[0] = 0.0;
|
85
|
+
if (literal_cost) {
|
86
|
+
for (int i = 0; i < num_bytes; ++i) {
|
87
|
+
literal_costs_[i + 1] = literal_costs_[i] +
|
88
|
+
literal_cost[(position + i) & literal_cost_mask];
|
89
|
+
}
|
90
|
+
} else {
|
91
|
+
for (int i = 1; i <= num_bytes; ++i) {
|
92
|
+
literal_costs_[i] = i * 5.4;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
cost_cmd_.resize(kNumCommandPrefixes);
|
96
|
+
cost_dist_.resize(kNumDistancePrefixes);
|
97
|
+
for (int i = 0; i < kNumCommandPrefixes; ++i) {
|
98
|
+
cost_cmd_[i] = FastLog2(11 + i);
|
99
|
+
}
|
100
|
+
for (int i = 0; i < kNumDistancePrefixes; ++i) {
|
101
|
+
cost_dist_[i] = FastLog2(20 + i);
|
102
|
+
}
|
103
|
+
min_cost_cmd_ = FastLog2(11);
|
104
|
+
}
|
105
|
+
|
106
|
+
double GetCommandCost(
|
107
|
+
int dist_code, int length_code, int insert_length) const {
|
108
|
+
int inscode = GetInsertLengthCode(insert_length);
|
109
|
+
int copycode = GetCopyLengthCode(length_code);
|
110
|
+
uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code);
|
111
|
+
uint64_t insnumextra = insextra[inscode];
|
112
|
+
uint64_t copynumextra = copyextra[copycode];
|
113
|
+
uint16_t dist_symbol;
|
114
|
+
uint32_t distextra;
|
115
|
+
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
|
116
|
+
uint32_t distnumextra = distextra >> 24;
|
117
|
+
|
118
|
+
double result = insnumextra + copynumextra + distnumextra;
|
119
|
+
result += cost_cmd_[cmdcode];
|
120
|
+
if (cmdcode >= 128) result += cost_dist_[dist_symbol];
|
121
|
+
return result;
|
122
|
+
}
|
123
|
+
|
124
|
+
double GetLiteralCosts(int from, int to) const {
|
125
|
+
return literal_costs_[to] - literal_costs_[from];
|
126
|
+
}
|
127
|
+
|
128
|
+
double GetMinCostCmd() const {
|
129
|
+
return min_cost_cmd_;
|
130
|
+
}
|
131
|
+
|
132
|
+
private:
|
133
|
+
void Set(const std::vector<int>& histogram, std::vector<double>* cost) {
|
134
|
+
cost->resize(histogram.size());
|
135
|
+
int sum = 0;
|
136
|
+
for (size_t i = 0; i < histogram.size(); i++) {
|
137
|
+
sum += histogram[i];
|
138
|
+
}
|
139
|
+
double log2sum = FastLog2(sum);
|
140
|
+
for (size_t i = 0; i < histogram.size(); i++) {
|
141
|
+
if (histogram[i] == 0) {
|
142
|
+
(*cost)[i] = log2sum + 2;
|
143
|
+
continue;
|
144
|
+
}
|
145
|
+
|
146
|
+
// Shannon bits for this symbol.
|
147
|
+
(*cost)[i] = log2sum - FastLog2(histogram[i]);
|
148
|
+
|
149
|
+
// Cannot be coded with less than 1 bit
|
150
|
+
if ((*cost)[i] < 1) (*cost)[i] = 1;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
std::vector<double> cost_cmd_; // The insert and copy length symbols.
|
155
|
+
std::vector<double> cost_dist_;
|
156
|
+
// Cumulative costs of literals per position in the stream.
|
157
|
+
std::vector<double> literal_costs_;
|
158
|
+
double min_cost_cmd_;
|
159
|
+
};
|
160
|
+
|
161
|
+
inline void SetDistanceCache(int distance,
|
162
|
+
int distance_code,
|
163
|
+
int max_distance,
|
164
|
+
const int* dist_cache,
|
165
|
+
int* result_dist_cache) {
|
166
|
+
if (distance <= max_distance && distance_code > 0) {
|
167
|
+
result_dist_cache[0] = distance;
|
168
|
+
memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
|
169
|
+
} else {
|
170
|
+
memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
inline int ComputeDistanceCode(int distance,
|
175
|
+
int max_distance,
|
176
|
+
int quality,
|
177
|
+
const int* dist_cache) {
|
178
|
+
if (distance <= max_distance) {
|
179
|
+
if (distance == dist_cache[0]) {
|
180
|
+
return 0;
|
181
|
+
} else if (distance == dist_cache[1]) {
|
182
|
+
return 1;
|
183
|
+
} else if (distance == dist_cache[2]) {
|
184
|
+
return 2;
|
185
|
+
} else if (distance == dist_cache[3]) {
|
186
|
+
return 3;
|
187
|
+
} else if (quality > 3 && distance >= 6) {
|
188
|
+
for (int k = 4; k < kNumDistanceShortCodes; ++k) {
|
189
|
+
int idx = kDistanceCacheIndex[k];
|
190
|
+
int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
|
191
|
+
static const int kLimits[16] = { 0, 0, 0, 0,
|
192
|
+
6, 6, 11, 11,
|
193
|
+
11, 11, 11, 11,
|
194
|
+
12, 12, 12, 12 };
|
195
|
+
if (distance == candidate && distance >= kLimits[k]) {
|
196
|
+
return k;
|
197
|
+
}
|
198
|
+
}
|
199
|
+
}
|
200
|
+
}
|
201
|
+
return distance + 15;
|
202
|
+
}
|
203
|
+
|
204
|
+
struct ZopfliNode {
|
205
|
+
ZopfliNode() : length(1),
|
206
|
+
distance(0),
|
207
|
+
distance_code(0),
|
208
|
+
length_code(0),
|
209
|
+
insert_length(0),
|
210
|
+
cost(kInfinity) {}
|
211
|
+
|
212
|
+
// best length to get up to this byte (not including this byte itself)
|
213
|
+
int length;
|
214
|
+
// distance associated with the length
|
215
|
+
int distance;
|
216
|
+
int distance_code;
|
217
|
+
int distance_cache[4];
|
218
|
+
// length code associated with the length - usually the same as length,
|
219
|
+
// except in case of length-changing dictionary transformation.
|
220
|
+
int length_code;
|
221
|
+
// number of literal inserts before this copy
|
222
|
+
int insert_length;
|
223
|
+
// smallest cost to get to this byte from the beginning, as found so far
|
224
|
+
double cost;
|
225
|
+
};
|
226
|
+
|
227
|
+
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
|
228
|
+
int len, int len_code, int dist, int dist_code,
|
229
|
+
int max_dist, const int* dist_cache,
|
230
|
+
double cost) {
|
231
|
+
ZopfliNode& next = nodes[pos + len];
|
232
|
+
next.length = len;
|
233
|
+
next.length_code = len_code;
|
234
|
+
next.distance = dist;
|
235
|
+
next.distance_code = dist_code;
|
236
|
+
next.insert_length = pos - start_pos;
|
237
|
+
next.cost = cost;
|
238
|
+
SetDistanceCache(dist, dist_code, max_dist, dist_cache,
|
239
|
+
&next.distance_cache[0]);
|
240
|
+
}
|
241
|
+
|
242
|
+
// Maintains the smallest 2^k cost difference together with their positions
|
243
|
+
class StartPosQueue {
|
244
|
+
public:
|
245
|
+
explicit StartPosQueue(int bits)
|
246
|
+
: mask_((1 << bits) - 1), q_(1 << bits), idx_(0) {}
|
247
|
+
|
248
|
+
void Clear() {
|
249
|
+
idx_ = 0;
|
250
|
+
}
|
251
|
+
|
252
|
+
void Push(size_t pos, double costdiff) {
|
253
|
+
if (costdiff == kInfinity) {
|
254
|
+
// We can't start a command from an unreachable start position.
|
255
|
+
// E.g. position 1 in a stream is always unreachable, because all commands
|
256
|
+
// have a copy of at least length 2.
|
257
|
+
return;
|
258
|
+
}
|
259
|
+
q_[idx_ & mask_] = std::make_pair(pos, costdiff);
|
260
|
+
// Restore the sorted order.
|
261
|
+
for (int i = idx_; i > 0 && i > idx_ - mask_; --i) {
|
262
|
+
if (q_[i & mask_].second > q_[(i - 1) & mask_].second) {
|
263
|
+
std::swap(q_[i & mask_], q_[(i - 1) & mask_]);
|
264
|
+
}
|
265
|
+
}
|
266
|
+
++idx_;
|
267
|
+
}
|
268
|
+
|
269
|
+
int size() const { return std::min<int>(idx_, mask_ + 1); }
|
270
|
+
|
271
|
+
size_t GetStartPos(int k) const {
|
272
|
+
return q_[(idx_ - k - 1) & mask_].first;
|
273
|
+
}
|
274
|
+
|
275
|
+
private:
|
276
|
+
const int mask_;
|
277
|
+
std::vector<std::pair<size_t, double> > q_;
|
278
|
+
int idx_;
|
279
|
+
};
|
280
|
+
|
281
|
+
// Returns the minimum possible copy length that can improve the cost of any
|
282
|
+
// future position.
|
283
|
+
int ComputeMinimumCopyLength(const StartPosQueue& queue,
|
284
|
+
const std::vector<ZopfliNode>& nodes,
|
285
|
+
const ZopfliCostModel& model,
|
286
|
+
size_t pos,
|
287
|
+
double min_cost_cmd) {
|
288
|
+
// Compute the minimum possible cost of reaching any future position.
|
289
|
+
const size_t start0 = queue.GetStartPos(0);
|
290
|
+
double min_cost = (nodes[start0].cost +
|
291
|
+
model.GetLiteralCosts(start0, pos) +
|
292
|
+
min_cost_cmd);
|
293
|
+
int len = 2;
|
294
|
+
int next_len_bucket = 4;
|
295
|
+
int next_len_offset = 10;
|
296
|
+
while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
|
297
|
+
// We already reached (pos + len) with no more cost than the minimum
|
298
|
+
// possible cost of reaching anything from this pos, so there is no point in
|
299
|
+
// looking for lengths <= len.
|
300
|
+
++len;
|
301
|
+
if (len == next_len_offset) {
|
302
|
+
// We reached the next copy length code bucket, so we add one more
|
303
|
+
// extra bit to the minimum cost.
|
304
|
+
min_cost += 1.0;
|
305
|
+
next_len_offset += next_len_bucket;
|
306
|
+
next_len_bucket *= 2;
|
307
|
+
}
|
308
|
+
}
|
309
|
+
return len;
|
310
|
+
}
|
311
|
+
|
312
|
+
void ZopfliIterate(size_t num_bytes,
|
313
|
+
size_t position,
|
314
|
+
const uint8_t* ringbuffer,
|
315
|
+
size_t ringbuffer_mask,
|
316
|
+
const size_t max_backward_limit,
|
317
|
+
const ZopfliCostModel& model,
|
318
|
+
const std::vector<int>& num_matches,
|
319
|
+
const std::vector<BackwardMatch>& matches,
|
320
|
+
int* dist_cache,
|
321
|
+
int* last_insert_len,
|
322
|
+
Command* commands,
|
323
|
+
int* num_commands,
|
324
|
+
int* num_literals) {
|
325
|
+
const Command * const orig_commands = commands;
|
326
|
+
|
327
|
+
std::vector<ZopfliNode> nodes(num_bytes + 1);
|
328
|
+
nodes[0].length = 0;
|
329
|
+
nodes[0].cost = 0;
|
330
|
+
memcpy(nodes[0].distance_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
331
|
+
|
332
|
+
StartPosQueue queue(3);
|
333
|
+
const double min_cost_cmd = model.GetMinCostCmd();
|
334
|
+
|
335
|
+
size_t cur_match_pos = 0;
|
336
|
+
for (size_t i = 0; i + 3 < num_bytes; i++) {
|
337
|
+
size_t cur_ix = position + i;
|
338
|
+
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
|
339
|
+
size_t max_distance = std::min(cur_ix, max_backward_limit);
|
340
|
+
int max_length = num_bytes - i;
|
341
|
+
|
342
|
+
queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
|
343
|
+
|
344
|
+
const int min_len = ComputeMinimumCopyLength(queue, nodes, model,
|
345
|
+
i, min_cost_cmd);
|
346
|
+
|
347
|
+
// Go over the command starting positions in order of increasing cost
|
348
|
+
// difference.
|
349
|
+
for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
|
350
|
+
const size_t start = queue.GetStartPos(k);
|
351
|
+
const double start_costdiff =
|
352
|
+
nodes[start].cost - model.GetLiteralCosts(0, start);
|
353
|
+
const int* dist_cache2 = &nodes[start].distance_cache[0];
|
354
|
+
|
355
|
+
// Look for last distance matches using the distance cache from this
|
356
|
+
// starting position.
|
357
|
+
int best_len = min_len - 1;
|
358
|
+
for (int j = 0; j < kNumDistanceShortCodes; ++j) {
|
359
|
+
const int idx = kDistanceCacheIndex[j];
|
360
|
+
const int backward = dist_cache2[idx] + kDistanceCacheOffset[j];
|
361
|
+
size_t prev_ix = cur_ix - backward;
|
362
|
+
if (prev_ix >= cur_ix) {
|
363
|
+
continue;
|
364
|
+
}
|
365
|
+
if (PREDICT_FALSE(backward > max_distance)) {
|
366
|
+
continue;
|
367
|
+
}
|
368
|
+
prev_ix &= ringbuffer_mask;
|
369
|
+
|
370
|
+
if (cur_ix_masked + best_len > ringbuffer_mask ||
|
371
|
+
prev_ix + best_len > ringbuffer_mask ||
|
372
|
+
ringbuffer[cur_ix_masked + best_len] !=
|
373
|
+
ringbuffer[prev_ix + best_len]) {
|
374
|
+
continue;
|
375
|
+
}
|
376
|
+
const size_t len =
|
377
|
+
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
|
378
|
+
&ringbuffer[cur_ix_masked],
|
379
|
+
max_length);
|
380
|
+
for (int l = best_len + 1; l <= len; ++l) {
|
381
|
+
double cmd_cost = model.GetCommandCost(j, l, i - start);
|
382
|
+
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
|
383
|
+
if (cost < nodes[i + l].cost) {
|
384
|
+
UpdateZopfliNode(&nodes[0], i, start, l, l, backward, j,
|
385
|
+
max_distance, dist_cache2, cost);
|
386
|
+
}
|
387
|
+
best_len = l;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
// At higher iterations look only for new last distance matches, since
|
392
|
+
// looking only for new command start positions with the same distances
|
393
|
+
// does not help much.
|
394
|
+
if (k >= 2) continue;
|
395
|
+
|
396
|
+
// Loop through all possible copy lengths at this position.
|
397
|
+
int len = min_len;
|
398
|
+
for (int j = 0; j < num_matches[i]; ++j) {
|
399
|
+
BackwardMatch match = matches[cur_match_pos + j];
|
400
|
+
int dist = match.distance;
|
401
|
+
bool is_dictionary_match = dist > max_distance;
|
402
|
+
// We already tried all possible last distance matches, so we can use
|
403
|
+
// normal distance code here.
|
404
|
+
int dist_code = dist + 15;
|
405
|
+
// Try all copy lengths up until the maximum copy length corresponding
|
406
|
+
// to this distance. If the distance refers to the static dictionary, or
|
407
|
+
// the maximum length is long enough, try only one maximum length.
|
408
|
+
int max_len = match.length();
|
409
|
+
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
|
410
|
+
len = max_len;
|
411
|
+
}
|
412
|
+
for (; len <= max_len; ++len) {
|
413
|
+
int len_code = is_dictionary_match ? match.length_code() : len;
|
414
|
+
double cmd_cost =
|
415
|
+
model.GetCommandCost(dist_code, len_code, i - start);
|
416
|
+
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
|
417
|
+
if (cost < nodes[i + len].cost) {
|
418
|
+
UpdateZopfliNode(&nodes[0], i, start, len, len_code, dist,
|
419
|
+
dist_code, max_distance, dist_cache2, cost);
|
420
|
+
}
|
421
|
+
}
|
422
|
+
}
|
423
|
+
}
|
424
|
+
|
425
|
+
cur_match_pos += num_matches[i];
|
426
|
+
|
427
|
+
// The zopflification can be too slow in case of very long lengths, so in
|
428
|
+
// such case skip it all, it does not cost a lot of compression ratio.
|
429
|
+
if (num_matches[i] == 1 &&
|
430
|
+
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
|
431
|
+
i += matches[cur_match_pos - 1].length() - 1;
|
432
|
+
queue.Clear();
|
433
|
+
}
|
434
|
+
}
|
435
|
+
|
436
|
+
std::vector<int> backwards;
|
437
|
+
size_t index = num_bytes;
|
438
|
+
while (nodes[index].cost == kInfinity) --index;
|
439
|
+
while (index > 0) {
|
440
|
+
int len = nodes[index].length + nodes[index].insert_length;
|
441
|
+
backwards.push_back(len);
|
442
|
+
index -= len;
|
443
|
+
}
|
444
|
+
|
445
|
+
std::vector<int> path;
|
446
|
+
for (size_t i = backwards.size(); i > 0; i--) {
|
447
|
+
path.push_back(backwards[i - 1]);
|
448
|
+
}
|
449
|
+
|
450
|
+
size_t pos = 0;
|
451
|
+
for (size_t i = 0; i < path.size(); i++) {
|
452
|
+
const ZopfliNode& next = nodes[pos + path[i]];
|
453
|
+
int copy_length = next.length;
|
454
|
+
int insert_length = next.insert_length;
|
455
|
+
pos += insert_length;
|
456
|
+
if (i == 0) {
|
457
|
+
insert_length += *last_insert_len;
|
458
|
+
*last_insert_len = 0;
|
459
|
+
}
|
460
|
+
int distance = next.distance;
|
461
|
+
int len_code = next.length_code;
|
462
|
+
size_t max_distance = std::min(position + pos, max_backward_limit);
|
463
|
+
bool is_dictionary = (distance > max_distance);
|
464
|
+
int dist_code = next.distance_code;
|
465
|
+
|
466
|
+
Command cmd(insert_length, copy_length, len_code, dist_code);
|
467
|
+
*commands++ = cmd;
|
468
|
+
|
469
|
+
if (!is_dictionary && dist_code > 0) {
|
470
|
+
dist_cache[3] = dist_cache[2];
|
471
|
+
dist_cache[2] = dist_cache[1];
|
472
|
+
dist_cache[1] = dist_cache[0];
|
473
|
+
dist_cache[0] = distance;
|
474
|
+
}
|
475
|
+
|
476
|
+
*num_literals += insert_length;
|
477
|
+
insert_length = 0;
|
478
|
+
pos += copy_length;
|
479
|
+
}
|
480
|
+
*last_insert_len += num_bytes - pos;
|
481
|
+
*num_commands += (commands - orig_commands);
|
482
|
+
}
|
483
|
+
|
484
|
+
template<typename Hasher>
|
485
|
+
void CreateBackwardReferences(size_t num_bytes,
|
486
|
+
size_t position,
|
487
|
+
const uint8_t* ringbuffer,
|
488
|
+
size_t ringbuffer_mask,
|
489
|
+
const size_t max_backward_limit,
|
490
|
+
const int quality,
|
491
|
+
Hasher* hasher,
|
492
|
+
int* dist_cache,
|
493
|
+
int* last_insert_len,
|
494
|
+
Command* commands,
|
495
|
+
int* num_commands,
|
496
|
+
int* num_literals) {
|
497
|
+
if (num_bytes >= 3 && position >= 3) {
|
498
|
+
// Prepare the hashes for three last bytes of the last write.
|
499
|
+
// These could not be calculated before, since they require knowledge
|
500
|
+
// of both the previous and the current block.
|
501
|
+
hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
|
502
|
+
position - 3);
|
503
|
+
hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
|
504
|
+
position - 2);
|
505
|
+
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
|
506
|
+
position - 1);
|
507
|
+
}
|
508
|
+
const Command * const orig_commands = commands;
|
509
|
+
int insert_length = *last_insert_len;
|
510
|
+
size_t i = position & ringbuffer_mask;
|
511
|
+
const int i_diff = position - i;
|
512
|
+
const size_t i_end = i + num_bytes;
|
513
|
+
|
514
|
+
// For speed up heuristics for random data.
|
515
|
+
const int random_heuristics_window_size = quality < 9 ? 64 : 512;
|
516
|
+
int apply_random_heuristics = i + random_heuristics_window_size;
|
517
|
+
|
518
|
+
// Minimum score to accept a backward reference.
|
519
|
+
const int kMinScore = 4.0;
|
520
|
+
|
521
|
+
while (i + Hasher::kHashTypeLength - 1 < i_end) {
|
522
|
+
int max_length = i_end - i;
|
523
|
+
size_t max_distance = std::min(i + i_diff, max_backward_limit);
|
524
|
+
int best_len = 0;
|
525
|
+
int best_len_code = 0;
|
526
|
+
int best_dist = 0;
|
527
|
+
double best_score = kMinScore;
|
528
|
+
bool match_found = hasher->FindLongestMatch(
|
529
|
+
ringbuffer, ringbuffer_mask,
|
530
|
+
dist_cache, i + i_diff, max_length, max_distance,
|
531
|
+
&best_len, &best_len_code, &best_dist, &best_score);
|
532
|
+
if (match_found) {
|
533
|
+
// Found a match. Let's look for something even better ahead.
|
534
|
+
int delayed_backward_references_in_row = 0;
|
535
|
+
for (;;) {
|
536
|
+
--max_length;
|
537
|
+
int best_len_2 = quality < 5 ? std::min(best_len - 1, max_length) : 0;
|
538
|
+
int best_len_code_2 = 0;
|
539
|
+
int best_dist_2 = 0;
|
540
|
+
double best_score_2 = kMinScore;
|
541
|
+
max_distance = std::min(i + i_diff + 1, max_backward_limit);
|
542
|
+
hasher->Store(ringbuffer + i, i + i_diff);
|
543
|
+
match_found = hasher->FindLongestMatch(
|
544
|
+
ringbuffer, ringbuffer_mask,
|
545
|
+
dist_cache, i + i_diff + 1, max_length, max_distance,
|
546
|
+
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
|
547
|
+
double cost_diff_lazy = 7.0;
|
548
|
+
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
549
|
+
// Ok, let's just write one byte for now and start a match from the
|
550
|
+
// next byte.
|
551
|
+
++i;
|
552
|
+
++insert_length;
|
553
|
+
best_len = best_len_2;
|
554
|
+
best_len_code = best_len_code_2;
|
555
|
+
best_dist = best_dist_2;
|
556
|
+
best_score = best_score_2;
|
557
|
+
if (++delayed_backward_references_in_row < 4) {
|
558
|
+
continue;
|
559
|
+
}
|
560
|
+
}
|
561
|
+
break;
|
562
|
+
}
|
563
|
+
apply_random_heuristics =
|
564
|
+
i + 2 * best_len + random_heuristics_window_size;
|
565
|
+
max_distance = std::min(i + i_diff, max_backward_limit);
|
566
|
+
// The first 16 codes are special shortcodes, and the minimum offset is 1.
|
567
|
+
int distance_code =
|
568
|
+
ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
|
569
|
+
if (best_dist <= max_distance && distance_code > 0) {
|
570
|
+
dist_cache[3] = dist_cache[2];
|
571
|
+
dist_cache[2] = dist_cache[1];
|
572
|
+
dist_cache[1] = dist_cache[0];
|
573
|
+
dist_cache[0] = best_dist;
|
574
|
+
}
|
575
|
+
Command cmd(insert_length, best_len, best_len_code, distance_code);
|
576
|
+
*commands++ = cmd;
|
577
|
+
*num_literals += insert_length;
|
578
|
+
insert_length = 0;
|
579
|
+
// Put the hash keys into the table, if there are enough
|
580
|
+
// bytes left.
|
581
|
+
for (int j = 1; j < best_len; ++j) {
|
582
|
+
hasher->Store(&ringbuffer[i + j], i + i_diff + j);
|
583
|
+
}
|
584
|
+
i += best_len;
|
585
|
+
} else {
|
586
|
+
++insert_length;
|
587
|
+
hasher->Store(ringbuffer + i, i + i_diff);
|
588
|
+
++i;
|
589
|
+
// If we have not seen matches for a long time, we can skip some
|
590
|
+
// match lookups. Unsuccessful match lookups are very very expensive
|
591
|
+
// and this kind of a heuristic speeds up compression quite
|
592
|
+
// a lot.
|
593
|
+
if (i > apply_random_heuristics) {
|
594
|
+
// Going through uncompressible data, jump.
|
595
|
+
if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
|
596
|
+
// It is quite a long time since we saw a copy, so we assume
|
597
|
+
// that this data is not compressible, and store hashes less
|
598
|
+
// often. Hashes of non compressible data are less likely to
|
599
|
+
// turn out to be useful in the future, too, so we store less of
|
600
|
+
// them to not to flood out the hash table of good compressible
|
601
|
+
// data.
|
602
|
+
int i_jump = std::min(i + 16, i_end - 4);
|
603
|
+
for (; i < i_jump; i += 4) {
|
604
|
+
hasher->Store(ringbuffer + i, i + i_diff);
|
605
|
+
insert_length += 4;
|
606
|
+
}
|
607
|
+
} else {
|
608
|
+
int i_jump = std::min(i + 8, i_end - 3);
|
609
|
+
for (; i < i_jump; i += 2) {
|
610
|
+
hasher->Store(ringbuffer + i, i + i_diff);
|
611
|
+
insert_length += 2;
|
612
|
+
}
|
613
|
+
}
|
614
|
+
}
|
615
|
+
}
|
616
|
+
}
|
617
|
+
insert_length += (i_end - i);
|
618
|
+
*last_insert_len = insert_length;
|
619
|
+
*num_commands += (commands - orig_commands);
|
620
|
+
}
|
621
|
+
|
622
|
+
void CreateBackwardReferences(size_t num_bytes,
|
623
|
+
size_t position,
|
624
|
+
const uint8_t* ringbuffer,
|
625
|
+
size_t ringbuffer_mask,
|
626
|
+
const float* literal_cost,
|
627
|
+
size_t literal_cost_mask,
|
628
|
+
const size_t max_backward_limit,
|
629
|
+
const int quality,
|
630
|
+
Hashers* hashers,
|
631
|
+
int hash_type,
|
632
|
+
int* dist_cache,
|
633
|
+
int* last_insert_len,
|
634
|
+
Command* commands,
|
635
|
+
int* num_commands,
|
636
|
+
int* num_literals) {
|
637
|
+
bool zopflify = quality > 9;
|
638
|
+
if (zopflify) {
|
639
|
+
Hashers::H9* hasher = hashers->hash_h9.get();
|
640
|
+
if (num_bytes >= 3 && position >= 3) {
|
641
|
+
// Prepare the hashes for three last bytes of the last write.
|
642
|
+
// These could not be calculated before, since they require knowledge
|
643
|
+
// of both the previous and the current block.
|
644
|
+
hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
|
645
|
+
position - 3);
|
646
|
+
hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
|
647
|
+
position - 2);
|
648
|
+
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
|
649
|
+
position - 1);
|
650
|
+
}
|
651
|
+
std::vector<int> num_matches(num_bytes);
|
652
|
+
std::vector<BackwardMatch> matches(3 * num_bytes);
|
653
|
+
size_t cur_match_pos = 0;
|
654
|
+
for (size_t i = 0; i + 3 < num_bytes; ++i) {
|
655
|
+
size_t max_distance = std::min(position + i, max_backward_limit);
|
656
|
+
int max_length = num_bytes - i;
|
657
|
+
// Ensure that we have at least kMaxZopfliLen free slots.
|
658
|
+
if (matches.size() < cur_match_pos + kMaxZopfliLen) {
|
659
|
+
matches.resize(cur_match_pos + kMaxZopfliLen);
|
660
|
+
}
|
661
|
+
hasher->FindAllMatches(
|
662
|
+
ringbuffer, ringbuffer_mask,
|
663
|
+
position + i, max_length, max_distance,
|
664
|
+
&num_matches[i], &matches[cur_match_pos]);
|
665
|
+
hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
|
666
|
+
position + i);
|
667
|
+
cur_match_pos += num_matches[i];
|
668
|
+
if (num_matches[i] == 1) {
|
669
|
+
const int match_len = matches[cur_match_pos - 1].length();
|
670
|
+
if (match_len > kMaxZopfliLen) {
|
671
|
+
for (int j = 1; j < match_len; ++j) {
|
672
|
+
++i;
|
673
|
+
hasher->Store(
|
674
|
+
&ringbuffer[(position + i) & ringbuffer_mask], position + i);
|
675
|
+
num_matches[i] = 0;
|
676
|
+
}
|
677
|
+
}
|
678
|
+
}
|
679
|
+
}
|
680
|
+
int orig_num_literals = *num_literals;
|
681
|
+
int orig_last_insert_len = *last_insert_len;
|
682
|
+
int orig_dist_cache[4] = {
|
683
|
+
dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
|
684
|
+
};
|
685
|
+
int orig_num_commands = *num_commands;
|
686
|
+
static const int kIterations = 2;
|
687
|
+
for (int i = 0; i < kIterations; i++) {
|
688
|
+
ZopfliCostModel model;
|
689
|
+
if (i == 0) {
|
690
|
+
model.SetFromLiteralCosts(num_bytes, position,
|
691
|
+
literal_cost, literal_cost_mask);
|
692
|
+
} else {
|
693
|
+
model.SetFromCommands(num_bytes, position,
|
694
|
+
ringbuffer, ringbuffer_mask,
|
695
|
+
commands, *num_commands - orig_num_commands,
|
696
|
+
orig_last_insert_len);
|
697
|
+
}
|
698
|
+
*num_commands = orig_num_commands;
|
699
|
+
*num_literals = orig_num_literals;
|
700
|
+
*last_insert_len = orig_last_insert_len;
|
701
|
+
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
|
702
|
+
ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
|
703
|
+
max_backward_limit, model, num_matches, matches, dist_cache,
|
704
|
+
last_insert_len, commands, num_commands, num_literals);
|
705
|
+
}
|
706
|
+
return;
|
707
|
+
}
|
708
|
+
|
709
|
+
switch (hash_type) {
|
710
|
+
case 1:
|
711
|
+
CreateBackwardReferences<Hashers::H1>(
|
712
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
713
|
+
quality, hashers->hash_h1.get(), dist_cache, last_insert_len,
|
714
|
+
commands, num_commands, num_literals);
|
715
|
+
break;
|
716
|
+
case 2:
|
717
|
+
CreateBackwardReferences<Hashers::H2>(
|
718
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
719
|
+
quality, hashers->hash_h2.get(), dist_cache, last_insert_len,
|
720
|
+
commands, num_commands, num_literals);
|
721
|
+
break;
|
722
|
+
case 3:
|
723
|
+
CreateBackwardReferences<Hashers::H3>(
|
724
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
725
|
+
quality, hashers->hash_h3.get(), dist_cache, last_insert_len,
|
726
|
+
commands, num_commands, num_literals);
|
727
|
+
break;
|
728
|
+
case 4:
|
729
|
+
CreateBackwardReferences<Hashers::H4>(
|
730
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
731
|
+
quality, hashers->hash_h4.get(), dist_cache, last_insert_len,
|
732
|
+
commands, num_commands, num_literals);
|
733
|
+
break;
|
734
|
+
case 5:
|
735
|
+
CreateBackwardReferences<Hashers::H5>(
|
736
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
737
|
+
quality, hashers->hash_h5.get(), dist_cache, last_insert_len,
|
738
|
+
commands, num_commands, num_literals);
|
739
|
+
break;
|
740
|
+
case 6:
|
741
|
+
CreateBackwardReferences<Hashers::H6>(
|
742
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
743
|
+
quality, hashers->hash_h6.get(), dist_cache, last_insert_len,
|
744
|
+
commands, num_commands, num_literals);
|
745
|
+
break;
|
746
|
+
case 7:
|
747
|
+
CreateBackwardReferences<Hashers::H7>(
|
748
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
749
|
+
quality, hashers->hash_h7.get(), dist_cache, last_insert_len,
|
750
|
+
commands, num_commands, num_literals);
|
751
|
+
break;
|
752
|
+
case 8:
|
753
|
+
CreateBackwardReferences<Hashers::H8>(
|
754
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
755
|
+
quality, hashers->hash_h8.get(), dist_cache, last_insert_len,
|
756
|
+
commands, num_commands, num_literals);
|
757
|
+
break;
|
758
|
+
case 9:
|
759
|
+
CreateBackwardReferences<Hashers::H9>(
|
760
|
+
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
|
761
|
+
quality, hashers->hash_h9.get(), dist_cache, last_insert_len,
|
762
|
+
commands, num_commands, num_literals);
|
763
|
+
break;
|
764
|
+
default:
|
765
|
+
break;
|
766
|
+
}
|
767
|
+
}
|
768
|
+
|
769
|
+
} // namespace brotli
|