extbrotli 0.0.1.PROTOTYPE
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
brotli
|
2
|
+
======
|
3
|
+
|
4
|
+
Brotli is a generic-purpose lossless compression algorithm that compresses data
|
5
|
+
using a combination of a modern variant of the LZ77 algorithm, Huffman coding
|
6
|
+
and 2nd order context modeling, with a compression ratio comparable to the best
|
7
|
+
currently available general-purpose compression methods. It is similar in speed
|
8
|
+
with deflate but offers more dense compression.
|
9
|
+
|
10
|
+
The specification of the Brotli Compressed Data Format is defined in the
|
11
|
+
following internet draft:
|
12
|
+
http://www.ietf.org/id/draft-alakuijala-brotli
|
13
|
+
|
14
|
+
Brotli is open-sourced under the Apache License, Version 2.0, see the LICENSE
|
15
|
+
file.
|
16
|
+
|
17
|
+
Brotli mailing list:
|
18
|
+
https://groups.google.com/forum/#!forum/brotli
|
@@ -0,0 +1,55 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Bit reading helpers */
|
17
|
+
|
18
|
+
#include <stdlib.h>
|
19
|
+
|
20
|
+
#include "./bit_reader.h"
|
21
|
+
#include "./port.h"
|
22
|
+
|
23
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
24
|
+
extern "C" {
|
25
|
+
#endif
|
26
|
+
|
27
|
+
void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
|
28
|
+
BROTLI_DCHECK(br != NULL);
|
29
|
+
|
30
|
+
br->input_ = input;
|
31
|
+
br->val_ = 0;
|
32
|
+
br->bit_pos_ = 0;
|
33
|
+
br->avail_in = 0;
|
34
|
+
br->eos_ = 0;
|
35
|
+
br->next_in = br->buf_;
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
void BrotliWarmupBitReader(BrotliBitReader* const br) {
|
40
|
+
size_t i;
|
41
|
+
br->val_ = 0;
|
42
|
+
for (i = 0; i < sizeof(br->val_); ++i) {
|
43
|
+
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
|
44
|
+
br->val_ |= ((uint64_t)*br->next_in) << (8 * i);
|
45
|
+
#else
|
46
|
+
br->val_ |= ((uint32_t)*br->next_in) << (8 * i);
|
47
|
+
#endif
|
48
|
+
++br->next_in;
|
49
|
+
--br->avail_in;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
54
|
+
} /* extern "C" */
|
55
|
+
#endif
|
@@ -0,0 +1,256 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Bit reading helpers */
|
17
|
+
|
18
|
+
#ifndef BROTLI_DEC_BIT_READER_H_
|
19
|
+
#define BROTLI_DEC_BIT_READER_H_
|
20
|
+
|
21
|
+
#include <string.h>
|
22
|
+
#include "./port.h"
|
23
|
+
#include "./streams.h"
|
24
|
+
#include "./types.h"
|
25
|
+
|
26
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
#define BROTLI_MAX_NUM_BIT_READ 25
|
31
|
+
#define BROTLI_READ_SIZE 1024
|
32
|
+
#define BROTLI_IMPLICIT_ZEROES 128
|
33
|
+
#define BROTLI_IBUF_SIZE (BROTLI_READ_SIZE + BROTLI_IMPLICIT_ZEROES)
|
34
|
+
#define BROTLI_IBUF_MASK (BROTLI_READ_SIZE - 1)
|
35
|
+
|
36
|
+
/* Masking with this expression turns to a single "Unsigned Bit Field Extract"
|
37
|
+
UBFX instruction on ARM. */
|
38
|
+
static BROTLI_INLINE uint32_t BitMask(int n) { return ~((0xffffffff) << n); }
|
39
|
+
|
40
|
+
typedef struct {
|
41
|
+
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
|
42
|
+
uint64_t val_; /* pre-fetched bits */
|
43
|
+
#else
|
44
|
+
uint32_t val_; /* pre-fetched bits */
|
45
|
+
#endif
|
46
|
+
uint32_t bit_pos_; /* current bit-reading position in val_ */
|
47
|
+
uint8_t* next_in; /* the byte we're reading from */
|
48
|
+
uint32_t avail_in;
|
49
|
+
int eos_; /* input stream is finished */
|
50
|
+
BrotliInput input_; /* input callback */
|
51
|
+
|
52
|
+
/* Input byte buffer, consist of a ringbuffer and a "slack" region where */
|
53
|
+
/* bytes from the start of the ringbuffer are copied. */
|
54
|
+
uint8_t buf_[BROTLI_IBUF_SIZE];
|
55
|
+
} BrotliBitReader;
|
56
|
+
|
57
|
+
/* Initializes the bitreader fields. After this, BrotliReadInput then
|
58
|
+
BrotliWarmupBitReader must be used. */
|
59
|
+
void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
|
60
|
+
|
61
|
+
/* Initializes bit reading and bit position with the first input data available.
|
62
|
+
Requires that there is enough input available (BrotliCheckInputAmount). */
|
63
|
+
void BrotliWarmupBitReader(BrotliBitReader* const br);
|
64
|
+
|
65
|
+
/* Pulls data from the input to the the read buffer.
|
66
|
+
|
67
|
+
Returns 0 if one of:
|
68
|
+
- the input callback returned an error, or
|
69
|
+
- there is no more input and the position is past the end of the stream.
|
70
|
+
- finish is false and less than BROTLI_READ_SIZE are available - a next call
|
71
|
+
when more data is available makes it continue including the partially read
|
72
|
+
data
|
73
|
+
|
74
|
+
If finish is true and the end of the stream is reached,
|
75
|
+
BROTLI_IMPLICIT_ZEROES additional zero bytes are copied to the ringbuffer.
|
76
|
+
*/
|
77
|
+
static BROTLI_INLINE int BrotliReadInput(
|
78
|
+
BrotliBitReader* const br, int finish) {
|
79
|
+
if (PREDICT_FALSE(br->eos_)) {
|
80
|
+
return 0;
|
81
|
+
} else {
|
82
|
+
size_t i;
|
83
|
+
int bytes_read;
|
84
|
+
if (br->next_in != br->buf_) {
|
85
|
+
for (i = 0; i < br->avail_in; i++) {
|
86
|
+
br->buf_[i] = br->next_in[i];
|
87
|
+
}
|
88
|
+
br->next_in = br->buf_;
|
89
|
+
}
|
90
|
+
bytes_read = BrotliRead(br->input_, br->next_in + br->avail_in,
|
91
|
+
(size_t)(BROTLI_READ_SIZE - br->avail_in));
|
92
|
+
if (bytes_read < 0) {
|
93
|
+
return 0;
|
94
|
+
}
|
95
|
+
br->avail_in += (uint32_t)bytes_read;
|
96
|
+
if (br->avail_in < BROTLI_READ_SIZE) {
|
97
|
+
if (!finish) {
|
98
|
+
return 0;
|
99
|
+
}
|
100
|
+
br->eos_ = 1;
|
101
|
+
/* Store BROTLI_IMPLICIT_ZEROES bytes of zero after the stream end. */
|
102
|
+
memset(br->next_in + br->avail_in, 0, BROTLI_IMPLICIT_ZEROES);
|
103
|
+
br->avail_in += BROTLI_IMPLICIT_ZEROES;
|
104
|
+
}
|
105
|
+
return 1;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
/* Returns amount of unread bytes the bit reader still has buffered from the
|
110
|
+
BrotliInput, including whole bytes in br->val_. */
|
111
|
+
static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
|
112
|
+
return br->avail_in + sizeof(br->val_) - (br->bit_pos_ >> 3);
|
113
|
+
}
|
114
|
+
|
115
|
+
/* Checks if there is at least num bytes left in the input ringbuffer (excluding
|
116
|
+
the bits remaining in br->val_). The maximum value for num is
|
117
|
+
BROTLI_IMPLICIT_ZEROES bytes. */
|
118
|
+
static BROTLI_INLINE int BrotliCheckInputAmount(
|
119
|
+
BrotliBitReader* const br, size_t num) {
|
120
|
+
return br->avail_in >= num;
|
121
|
+
}
|
122
|
+
|
123
|
+
/* Guarantees that there are at least n_bits in the buffer.
|
124
|
+
n_bits should be in the range [1..24] */
|
125
|
+
static BROTLI_INLINE void BrotliFillBitWindow(
|
126
|
+
BrotliBitReader* const br, int n_bits) {
|
127
|
+
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
|
128
|
+
if (IS_CONSTANT(n_bits) && n_bits <= 8) {
|
129
|
+
if (br->bit_pos_ >= 56) {
|
130
|
+
br->val_ >>= 56;
|
131
|
+
br->bit_pos_ ^= 56; /* here same as -= 56 because of the if condition */
|
132
|
+
br->val_ |= (*(const uint64_t*)(br->next_in)) << 8;
|
133
|
+
br->avail_in -= 7;
|
134
|
+
br->next_in += 7;
|
135
|
+
}
|
136
|
+
} else if (IS_CONSTANT(n_bits) && n_bits <= 16) {
|
137
|
+
if (br->bit_pos_ >= 48) {
|
138
|
+
br->val_ >>= 48;
|
139
|
+
br->bit_pos_ ^= 48; /* here same as -= 48 because of the if condition */
|
140
|
+
br->val_ |= (*(const uint64_t*)(br->next_in)) << 16;
|
141
|
+
br->avail_in -= 6;
|
142
|
+
br->next_in += 6;
|
143
|
+
}
|
144
|
+
} else {
|
145
|
+
if (br->bit_pos_ >= 32) {
|
146
|
+
br->val_ >>= 32;
|
147
|
+
br->bit_pos_ ^= 32; /* here same as -= 32 because of the if condition */
|
148
|
+
br->val_ |= ((uint64_t)(*(const uint32_t*)(br->next_in))) << 32;
|
149
|
+
br->avail_in -= 4;
|
150
|
+
br->next_in += 4;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
#elif (BROTLI_LITTLE_ENDIAN)
|
154
|
+
if (IS_CONSTANT(n_bits) && n_bits <= 8) {
|
155
|
+
if (br->bit_pos_ >= 24) {
|
156
|
+
br->val_ >>= 24;
|
157
|
+
br->bit_pos_ ^= 24; /* here same as -= 24 because of the if condition */
|
158
|
+
br->val_ |= (*(const uint32_t*)(br->next_in)) << 8;
|
159
|
+
br->avail_in -= 3;
|
160
|
+
br->next_in += 3;
|
161
|
+
}
|
162
|
+
} else {
|
163
|
+
if (br->bit_pos_ >= 16) {
|
164
|
+
br->val_ >>= 16;
|
165
|
+
br->bit_pos_ ^= 16; /* here same as -= 16 because of the if condition */
|
166
|
+
br->val_ |= ((uint32_t)(*(const uint16_t*)(br->next_in))) << 16;
|
167
|
+
br->avail_in -= 2;
|
168
|
+
br->next_in += 2;
|
169
|
+
}
|
170
|
+
if (!IS_CONSTANT(n_bits) || (n_bits > 16)) {
|
171
|
+
if (br->bit_pos_ >= 8) {
|
172
|
+
br->val_ >>= 8;
|
173
|
+
br->bit_pos_ ^= 8; /* here same as -= 8 because of the if condition */
|
174
|
+
br->val_ |= ((uint32_t)*br->next_in) << 24;
|
175
|
+
--br->avail_in;
|
176
|
+
++br->next_in;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
#else
|
181
|
+
while (br->bit_pos_ >= 8) {
|
182
|
+
br->val_ >>= 8;
|
183
|
+
br->val_ |= ((uint32_t)*br->next_in) << 24;
|
184
|
+
br->bit_pos_ -= 8;
|
185
|
+
--br->avail_in;
|
186
|
+
++br->next_in;
|
187
|
+
}
|
188
|
+
#endif
|
189
|
+
}
|
190
|
+
|
191
|
+
/* Like BrotliGetBits, but does not mask the result, it is only guaranteed
|
192
|
+
that it has minimum n_bits. */
|
193
|
+
static BROTLI_INLINE uint32_t BrotliGetBitsUnmasked(
|
194
|
+
BrotliBitReader* const br, int n_bits) {
|
195
|
+
BrotliFillBitWindow(br, n_bits);
|
196
|
+
return (uint32_t)(br->val_ >> br->bit_pos_);
|
197
|
+
}
|
198
|
+
|
199
|
+
/* Returns the specified number of bits from br without advancing bit pos. */
|
200
|
+
static BROTLI_INLINE uint32_t BrotliGetBits(
|
201
|
+
BrotliBitReader* const br, int n_bits) {
|
202
|
+
BrotliFillBitWindow(br, n_bits);
|
203
|
+
return (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
|
204
|
+
}
|
205
|
+
|
206
|
+
/* Advances the bit pos by n_bits. */
|
207
|
+
static BROTLI_INLINE void BrotliDropBits(
|
208
|
+
BrotliBitReader* const br, int n_bits) {
|
209
|
+
br->bit_pos_ += (uint32_t)n_bits;
|
210
|
+
}
|
211
|
+
|
212
|
+
/* Reads the specified number of bits from br and advances the bit pos. */
|
213
|
+
static BROTLI_INLINE uint32_t BrotliReadBits(
|
214
|
+
BrotliBitReader* const br, int n_bits) {
|
215
|
+
uint32_t val;
|
216
|
+
BrotliFillBitWindow(br, n_bits);
|
217
|
+
val = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
|
218
|
+
#ifdef BROTLI_DECODE_DEBUG
|
219
|
+
printf("[BrotliReadBits] %d %d %d val: %6x\n",
|
220
|
+
(int)br->avail_in, (int)br->bit_pos_, n_bits, val);
|
221
|
+
#endif
|
222
|
+
br->bit_pos_ += (uint32_t)n_bits;
|
223
|
+
return val;
|
224
|
+
}
|
225
|
+
|
226
|
+
/* Advances the bit reader position to the next byte boundary and verifies
|
227
|
+
that any skipped bits are set to zero. */
|
228
|
+
static BROTLI_INLINE int BrotliJumpToByteBoundary(BrotliBitReader* br) {
|
229
|
+
uint32_t new_bit_pos = (br->bit_pos_ + 7) & (uint32_t)(~7UL);
|
230
|
+
uint32_t pad_bits = BrotliReadBits(br, (int)(new_bit_pos - br->bit_pos_));
|
231
|
+
return pad_bits == 0;
|
232
|
+
}
|
233
|
+
|
234
|
+
/* Copies remaining input bytes stored in the bit reader to the output. Value
|
235
|
+
num may not be larger than BrotliGetRemainingBytes. The bit reader must be
|
236
|
+
warmed up again after this. */
|
237
|
+
static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
|
238
|
+
BrotliBitReader* br, size_t num) {
|
239
|
+
while (br->bit_pos_ + 8 <= (BROTLI_64_BITS_LITTLE_ENDIAN ? 64 : 32)
|
240
|
+
&& num > 0) {
|
241
|
+
*dest = (uint8_t)(br->val_ >> br->bit_pos_);
|
242
|
+
br->bit_pos_ += 8;
|
243
|
+
++dest;
|
244
|
+
--num;
|
245
|
+
}
|
246
|
+
memcpy(dest, br->next_in, num);
|
247
|
+
br->avail_in -= (uint32_t)num;
|
248
|
+
br->next_in += num;
|
249
|
+
br->bit_pos_ = 0;
|
250
|
+
}
|
251
|
+
|
252
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
253
|
+
} /* extern "C" */
|
254
|
+
#endif
|
255
|
+
|
256
|
+
#endif /* BROTLI_DEC_BIT_READER_H_ */
|
@@ -0,0 +1,260 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Lookup table to map the previous two bytes to a context id.
|
17
|
+
|
18
|
+
There are four different context modeling modes defined here:
|
19
|
+
CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
|
20
|
+
CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
|
21
|
+
CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
|
22
|
+
CONTEXT_SIGNED: second-order context model tuned for signed integers.
|
23
|
+
|
24
|
+
The context id for the UTF8 context model is calculated as follows. If p1
|
25
|
+
and p2 are the previous two bytes, we calcualte the context as
|
26
|
+
|
27
|
+
context = kContextLookup[p1] | kContextLookup[p2 + 256].
|
28
|
+
|
29
|
+
If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
30
|
+
equivalent to
|
31
|
+
|
32
|
+
context = 4 * context1(p1) + context2(p2),
|
33
|
+
|
34
|
+
where context1 is based on the previous byte in the following way:
|
35
|
+
|
36
|
+
0 : non-ASCII control
|
37
|
+
1 : \t, \n, \r
|
38
|
+
2 : space
|
39
|
+
3 : other punctuation
|
40
|
+
4 : " '
|
41
|
+
5 : %
|
42
|
+
6 : ( < [ {
|
43
|
+
7 : ) > ] }
|
44
|
+
8 : , ; :
|
45
|
+
9 : .
|
46
|
+
10 : =
|
47
|
+
11 : number
|
48
|
+
12 : upper-case vowel
|
49
|
+
13 : upper-case consonant
|
50
|
+
14 : lower-case vowel
|
51
|
+
15 : lower-case consonant
|
52
|
+
|
53
|
+
and context2 is based on the second last byte:
|
54
|
+
|
55
|
+
0 : control, space
|
56
|
+
1 : punctuation
|
57
|
+
2 : upper-case letter, number
|
58
|
+
3 : lower-case letter
|
59
|
+
|
60
|
+
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
61
|
+
stream it will be a continuation byte, value between 128 and 191), the
|
62
|
+
context is the same as if the second last byte was an ASCII control or space.
|
63
|
+
|
64
|
+
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
65
|
+
be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
66
|
+
the last byte and to a lesser extent on the second last byte if it is ASCII.
|
67
|
+
|
68
|
+
If the last byte is a UTF8 continuation byte, the second last byte can be:
|
69
|
+
- continuation byte: the next byte is probably ASCII or lead byte (assuming
|
70
|
+
4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
71
|
+
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
72
|
+
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
73
|
+
|
74
|
+
The possible value combinations of the previous two bytes, the range of
|
75
|
+
context ids and the type of the next byte is summarized in the table below:
|
76
|
+
|
77
|
+
|--------\-----------------------------------------------------------------|
|
78
|
+
| \ Last byte |
|
79
|
+
| Second \---------------------------------------------------------------|
|
80
|
+
| last byte \ ASCII | cont. byte | lead byte |
|
81
|
+
| \ (0-127) | (128-191) | (192-) |
|
82
|
+
|=============|===================|=====================|==================|
|
83
|
+
| ASCII | next: ASCII/lead | not valid | next: cont. |
|
84
|
+
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
85
|
+
|-------------|-------------------|---------------------|------------------|
|
86
|
+
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
87
|
+
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
88
|
+
|-------------|-------------------|---------------------|------------------|
|
89
|
+
| lead byte | not valid | next: ASCII/lead | not valid |
|
90
|
+
| (192-207) | | context: 0 - 1 | |
|
91
|
+
|-------------|-------------------|---------------------|------------------|
|
92
|
+
| lead byte | not valid | next: cont. | not valid |
|
93
|
+
| (208-) | | context: 2 - 3 | |
|
94
|
+
|-------------|-------------------|---------------------|------------------|
|
95
|
+
|
96
|
+
The context id for the signed context mode is calculated as:
|
97
|
+
|
98
|
+
context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
|
99
|
+
|
100
|
+
For any context modeling modes, the context ids can be calculated by |-ing
|
101
|
+
together two lookups from one table using context model dependent offsets:
|
102
|
+
|
103
|
+
context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
|
104
|
+
|
105
|
+
where offset1 and offset2 are dependent on the context mode.
|
106
|
+
*/
|
107
|
+
|
108
|
+
#ifndef BROTLI_DEC_CONTEXT_H_
|
109
|
+
#define BROTLI_DEC_CONTEXT_H_
|
110
|
+
|
111
|
+
#include "./types.h"
|
112
|
+
|
113
|
+
enum ContextType {
|
114
|
+
CONTEXT_LSB6 = 0,
|
115
|
+
CONTEXT_MSB6 = 1,
|
116
|
+
CONTEXT_UTF8 = 2,
|
117
|
+
CONTEXT_SIGNED = 3
|
118
|
+
};
|
119
|
+
|
120
|
+
/* Common context lookup table for all context modes. */
|
121
|
+
static const uint8_t kContextLookup[1792] = {
|
122
|
+
/* CONTEXT_UTF8, last byte. */
|
123
|
+
/* ASCII range. */
|
124
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
125
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
126
|
+
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
127
|
+
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
128
|
+
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
129
|
+
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
130
|
+
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
131
|
+
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
132
|
+
/* UTF8 continuation byte range. */
|
133
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
134
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
135
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
136
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
137
|
+
/* UTF8 lead byte range. */
|
138
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
139
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
140
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
141
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
142
|
+
/* CONTEXT_UTF8 second last byte. */
|
143
|
+
/* ASCII range. */
|
144
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
145
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
146
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
147
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
148
|
+
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
149
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
150
|
+
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
151
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
152
|
+
/* UTF8 continuation byte range. */
|
153
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
154
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
155
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
156
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
157
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
158
|
+
/* UTF8 lead byte range. */
|
159
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
160
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
161
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
162
|
+
/* CONTEXT_SIGNED, second last byte. */
|
163
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
164
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
165
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
166
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
167
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
168
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
169
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
170
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
171
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
172
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
173
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
174
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
175
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
176
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
177
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
178
|
+
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
179
|
+
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
|
180
|
+
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
181
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
182
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
183
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
184
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
185
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
186
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
187
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
188
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
189
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
190
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
191
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
192
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
193
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
194
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
195
|
+
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
|
196
|
+
/* CONTEXT_LSB6, last byte. */
|
197
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
198
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
199
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
200
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
201
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
202
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
203
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
204
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
205
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
206
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
207
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
208
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
209
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
210
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
211
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
212
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
213
|
+
/* CONTEXT_MSB6, last byte. */
|
214
|
+
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
215
|
+
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
216
|
+
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
217
|
+
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
218
|
+
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
219
|
+
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
220
|
+
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
221
|
+
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
222
|
+
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
223
|
+
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
224
|
+
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
225
|
+
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
226
|
+
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
227
|
+
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
228
|
+
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
229
|
+
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
230
|
+
/* CONTEXT_{M,L}SB6, second last byte, */
|
231
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
232
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
233
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
234
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
235
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
236
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
237
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
238
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
239
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
240
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
241
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
242
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
243
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
244
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
245
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
246
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
247
|
+
};
|
248
|
+
|
249
|
+
static const int kContextLookupOffsets[8] = {
|
250
|
+
/* CONTEXT_LSB6 */
|
251
|
+
1024, 1536,
|
252
|
+
/* CONTEXT_MSB6 */
|
253
|
+
1280, 1536,
|
254
|
+
/* CONTEXT_UTF8 */
|
255
|
+
0, 256,
|
256
|
+
/* CONTEXT_SIGNED */
|
257
|
+
768, 512,
|
258
|
+
};
|
259
|
+
|
260
|
+
#endif /* BROTLI_DEC_CONTEXT_H_ */
|