extbrotli 0.0.1.PROTOTYPE
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
brotli
|
2
|
+
======
|
3
|
+
|
4
|
+
Brotli is a generic-purpose lossless compression algorithm that compresses data
|
5
|
+
using a combination of a modern variant of the LZ77 algorithm, Huffman coding
|
6
|
+
and 2nd order context modeling, with a compression ratio comparable to the best
|
7
|
+
currently available general-purpose compression methods. It is similar in speed
|
8
|
+
with deflate but offers more dense compression.
|
9
|
+
|
10
|
+
The specification of the Brotli Compressed Data Format is defined in the
|
11
|
+
following internet draft:
|
12
|
+
http://www.ietf.org/id/draft-alakuijala-brotli
|
13
|
+
|
14
|
+
Brotli is open-sourced under the Apache License, Version 2.0, see the LICENSE
|
15
|
+
file.
|
16
|
+
|
17
|
+
Brotli mailing list:
|
18
|
+
https://groups.google.com/forum/#!forum/brotli
|
@@ -0,0 +1,55 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Bit reading helpers */
|
17
|
+
|
18
|
+
#include <stdlib.h>
|
19
|
+
|
20
|
+
#include "./bit_reader.h"
|
21
|
+
#include "./port.h"
|
22
|
+
|
23
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
24
|
+
extern "C" {
|
25
|
+
#endif
|
26
|
+
|
27
|
+
void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
|
28
|
+
BROTLI_DCHECK(br != NULL);
|
29
|
+
|
30
|
+
br->input_ = input;
|
31
|
+
br->val_ = 0;
|
32
|
+
br->bit_pos_ = 0;
|
33
|
+
br->avail_in = 0;
|
34
|
+
br->eos_ = 0;
|
35
|
+
br->next_in = br->buf_;
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
void BrotliWarmupBitReader(BrotliBitReader* const br) {
|
40
|
+
size_t i;
|
41
|
+
br->val_ = 0;
|
42
|
+
for (i = 0; i < sizeof(br->val_); ++i) {
|
43
|
+
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
|
44
|
+
br->val_ |= ((uint64_t)*br->next_in) << (8 * i);
|
45
|
+
#else
|
46
|
+
br->val_ |= ((uint32_t)*br->next_in) << (8 * i);
|
47
|
+
#endif
|
48
|
+
++br->next_in;
|
49
|
+
--br->avail_in;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
54
|
+
} /* extern "C" */
|
55
|
+
#endif
|
@@ -0,0 +1,256 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Bit reading helpers */
|
17
|
+
|
18
|
+
#ifndef BROTLI_DEC_BIT_READER_H_
|
19
|
+
#define BROTLI_DEC_BIT_READER_H_
|
20
|
+
|
21
|
+
#include <string.h>
|
22
|
+
#include "./port.h"
|
23
|
+
#include "./streams.h"
|
24
|
+
#include "./types.h"
|
25
|
+
|
26
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
#define BROTLI_MAX_NUM_BIT_READ 25
|
31
|
+
#define BROTLI_READ_SIZE 1024
|
32
|
+
#define BROTLI_IMPLICIT_ZEROES 128
|
33
|
+
#define BROTLI_IBUF_SIZE (BROTLI_READ_SIZE + BROTLI_IMPLICIT_ZEROES)
|
34
|
+
#define BROTLI_IBUF_MASK (BROTLI_READ_SIZE - 1)
|
35
|
+
|
36
|
+
/* Masking with this expression turns to a single "Unsigned Bit Field Extract"
|
37
|
+
UBFX instruction on ARM. */
|
38
|
+
static BROTLI_INLINE uint32_t BitMask(int n) { return ~((0xffffffff) << n); }
|
39
|
+
|
40
|
+
typedef struct {
|
41
|
+
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
|
42
|
+
uint64_t val_; /* pre-fetched bits */
|
43
|
+
#else
|
44
|
+
uint32_t val_; /* pre-fetched bits */
|
45
|
+
#endif
|
46
|
+
uint32_t bit_pos_; /* current bit-reading position in val_ */
|
47
|
+
uint8_t* next_in; /* the byte we're reading from */
|
48
|
+
uint32_t avail_in;
|
49
|
+
int eos_; /* input stream is finished */
|
50
|
+
BrotliInput input_; /* input callback */
|
51
|
+
|
52
|
+
/* Input byte buffer, consist of a ringbuffer and a "slack" region where */
|
53
|
+
/* bytes from the start of the ringbuffer are copied. */
|
54
|
+
uint8_t buf_[BROTLI_IBUF_SIZE];
|
55
|
+
} BrotliBitReader;
|
56
|
+
|
57
|
+
/* Initializes the bitreader fields. After this, BrotliReadInput then
|
58
|
+
BrotliWarmupBitReader must be used. */
|
59
|
+
void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
|
60
|
+
|
61
|
+
/* Initializes bit reading and bit position with the first input data available.
|
62
|
+
Requires that there is enough input available (BrotliCheckInputAmount). */
|
63
|
+
void BrotliWarmupBitReader(BrotliBitReader* const br);
|
64
|
+
|
65
|
+
/* Pulls data from the input to the the read buffer.
|
66
|
+
|
67
|
+
Returns 0 if one of:
|
68
|
+
- the input callback returned an error, or
|
69
|
+
- there is no more input and the position is past the end of the stream.
|
70
|
+
- finish is false and less than BROTLI_READ_SIZE are available - a next call
|
71
|
+
when more data is available makes it continue including the partially read
|
72
|
+
data
|
73
|
+
|
74
|
+
If finish is true and the end of the stream is reached,
|
75
|
+
BROTLI_IMPLICIT_ZEROES additional zero bytes are copied to the ringbuffer.
|
76
|
+
*/
|
77
|
+
static BROTLI_INLINE int BrotliReadInput(
|
78
|
+
BrotliBitReader* const br, int finish) {
|
79
|
+
if (PREDICT_FALSE(br->eos_)) {
|
80
|
+
return 0;
|
81
|
+
} else {
|
82
|
+
size_t i;
|
83
|
+
int bytes_read;
|
84
|
+
if (br->next_in != br->buf_) {
|
85
|
+
for (i = 0; i < br->avail_in; i++) {
|
86
|
+
br->buf_[i] = br->next_in[i];
|
87
|
+
}
|
88
|
+
br->next_in = br->buf_;
|
89
|
+
}
|
90
|
+
bytes_read = BrotliRead(br->input_, br->next_in + br->avail_in,
|
91
|
+
(size_t)(BROTLI_READ_SIZE - br->avail_in));
|
92
|
+
if (bytes_read < 0) {
|
93
|
+
return 0;
|
94
|
+
}
|
95
|
+
br->avail_in += (uint32_t)bytes_read;
|
96
|
+
if (br->avail_in < BROTLI_READ_SIZE) {
|
97
|
+
if (!finish) {
|
98
|
+
return 0;
|
99
|
+
}
|
100
|
+
br->eos_ = 1;
|
101
|
+
/* Store BROTLI_IMPLICIT_ZEROES bytes of zero after the stream end. */
|
102
|
+
memset(br->next_in + br->avail_in, 0, BROTLI_IMPLICIT_ZEROES);
|
103
|
+
br->avail_in += BROTLI_IMPLICIT_ZEROES;
|
104
|
+
}
|
105
|
+
return 1;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
/* Returns amount of unread bytes the bit reader still has buffered from the
|
110
|
+
BrotliInput, including whole bytes in br->val_. */
|
111
|
+
static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
|
112
|
+
return br->avail_in + sizeof(br->val_) - (br->bit_pos_ >> 3);
|
113
|
+
}
|
114
|
+
|
115
|
+
/* Checks if there is at least num bytes left in the input ringbuffer (excluding
|
116
|
+
the bits remaining in br->val_). The maximum value for num is
|
117
|
+
BROTLI_IMPLICIT_ZEROES bytes. */
|
118
|
+
static BROTLI_INLINE int BrotliCheckInputAmount(
|
119
|
+
BrotliBitReader* const br, size_t num) {
|
120
|
+
return br->avail_in >= num;
|
121
|
+
}
|
122
|
+
|
123
|
+
/* Guarantees that there are at least n_bits in the buffer.
|
124
|
+
n_bits should be in the range [1..24] */
|
125
|
+
static BROTLI_INLINE void BrotliFillBitWindow(
|
126
|
+
BrotliBitReader* const br, int n_bits) {
|
127
|
+
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
|
128
|
+
if (IS_CONSTANT(n_bits) && n_bits <= 8) {
|
129
|
+
if (br->bit_pos_ >= 56) {
|
130
|
+
br->val_ >>= 56;
|
131
|
+
br->bit_pos_ ^= 56; /* here same as -= 56 because of the if condition */
|
132
|
+
br->val_ |= (*(const uint64_t*)(br->next_in)) << 8;
|
133
|
+
br->avail_in -= 7;
|
134
|
+
br->next_in += 7;
|
135
|
+
}
|
136
|
+
} else if (IS_CONSTANT(n_bits) && n_bits <= 16) {
|
137
|
+
if (br->bit_pos_ >= 48) {
|
138
|
+
br->val_ >>= 48;
|
139
|
+
br->bit_pos_ ^= 48; /* here same as -= 48 because of the if condition */
|
140
|
+
br->val_ |= (*(const uint64_t*)(br->next_in)) << 16;
|
141
|
+
br->avail_in -= 6;
|
142
|
+
br->next_in += 6;
|
143
|
+
}
|
144
|
+
} else {
|
145
|
+
if (br->bit_pos_ >= 32) {
|
146
|
+
br->val_ >>= 32;
|
147
|
+
br->bit_pos_ ^= 32; /* here same as -= 32 because of the if condition */
|
148
|
+
br->val_ |= ((uint64_t)(*(const uint32_t*)(br->next_in))) << 32;
|
149
|
+
br->avail_in -= 4;
|
150
|
+
br->next_in += 4;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
#elif (BROTLI_LITTLE_ENDIAN)
|
154
|
+
if (IS_CONSTANT(n_bits) && n_bits <= 8) {
|
155
|
+
if (br->bit_pos_ >= 24) {
|
156
|
+
br->val_ >>= 24;
|
157
|
+
br->bit_pos_ ^= 24; /* here same as -= 24 because of the if condition */
|
158
|
+
br->val_ |= (*(const uint32_t*)(br->next_in)) << 8;
|
159
|
+
br->avail_in -= 3;
|
160
|
+
br->next_in += 3;
|
161
|
+
}
|
162
|
+
} else {
|
163
|
+
if (br->bit_pos_ >= 16) {
|
164
|
+
br->val_ >>= 16;
|
165
|
+
br->bit_pos_ ^= 16; /* here same as -= 16 because of the if condition */
|
166
|
+
br->val_ |= ((uint32_t)(*(const uint16_t*)(br->next_in))) << 16;
|
167
|
+
br->avail_in -= 2;
|
168
|
+
br->next_in += 2;
|
169
|
+
}
|
170
|
+
if (!IS_CONSTANT(n_bits) || (n_bits > 16)) {
|
171
|
+
if (br->bit_pos_ >= 8) {
|
172
|
+
br->val_ >>= 8;
|
173
|
+
br->bit_pos_ ^= 8; /* here same as -= 8 because of the if condition */
|
174
|
+
br->val_ |= ((uint32_t)*br->next_in) << 24;
|
175
|
+
--br->avail_in;
|
176
|
+
++br->next_in;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
#else
|
181
|
+
while (br->bit_pos_ >= 8) {
|
182
|
+
br->val_ >>= 8;
|
183
|
+
br->val_ |= ((uint32_t)*br->next_in) << 24;
|
184
|
+
br->bit_pos_ -= 8;
|
185
|
+
--br->avail_in;
|
186
|
+
++br->next_in;
|
187
|
+
}
|
188
|
+
#endif
|
189
|
+
}
|
190
|
+
|
191
|
+
/* Like BrotliGetBits, but does not mask the result, it is only guaranteed
|
192
|
+
that it has minimum n_bits. */
|
193
|
+
static BROTLI_INLINE uint32_t BrotliGetBitsUnmasked(
|
194
|
+
BrotliBitReader* const br, int n_bits) {
|
195
|
+
BrotliFillBitWindow(br, n_bits);
|
196
|
+
return (uint32_t)(br->val_ >> br->bit_pos_);
|
197
|
+
}
|
198
|
+
|
199
|
+
/* Returns the specified number of bits from br without advancing bit pos. */
|
200
|
+
static BROTLI_INLINE uint32_t BrotliGetBits(
|
201
|
+
BrotliBitReader* const br, int n_bits) {
|
202
|
+
BrotliFillBitWindow(br, n_bits);
|
203
|
+
return (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
|
204
|
+
}
|
205
|
+
|
206
|
+
/* Advances the bit pos by n_bits. */
|
207
|
+
static BROTLI_INLINE void BrotliDropBits(
|
208
|
+
BrotliBitReader* const br, int n_bits) {
|
209
|
+
br->bit_pos_ += (uint32_t)n_bits;
|
210
|
+
}
|
211
|
+
|
212
|
+
/* Reads the specified number of bits from br and advances the bit pos. */
|
213
|
+
static BROTLI_INLINE uint32_t BrotliReadBits(
|
214
|
+
BrotliBitReader* const br, int n_bits) {
|
215
|
+
uint32_t val;
|
216
|
+
BrotliFillBitWindow(br, n_bits);
|
217
|
+
val = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
|
218
|
+
#ifdef BROTLI_DECODE_DEBUG
|
219
|
+
printf("[BrotliReadBits] %d %d %d val: %6x\n",
|
220
|
+
(int)br->avail_in, (int)br->bit_pos_, n_bits, val);
|
221
|
+
#endif
|
222
|
+
br->bit_pos_ += (uint32_t)n_bits;
|
223
|
+
return val;
|
224
|
+
}
|
225
|
+
|
226
|
+
/* Advances the bit reader position to the next byte boundary and verifies
|
227
|
+
that any skipped bits are set to zero. */
|
228
|
+
static BROTLI_INLINE int BrotliJumpToByteBoundary(BrotliBitReader* br) {
|
229
|
+
uint32_t new_bit_pos = (br->bit_pos_ + 7) & (uint32_t)(~7UL);
|
230
|
+
uint32_t pad_bits = BrotliReadBits(br, (int)(new_bit_pos - br->bit_pos_));
|
231
|
+
return pad_bits == 0;
|
232
|
+
}
|
233
|
+
|
234
|
+
/* Copies remaining input bytes stored in the bit reader to the output. Value
|
235
|
+
num may not be larger than BrotliGetRemainingBytes. The bit reader must be
|
236
|
+
warmed up again after this. */
|
237
|
+
static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
|
238
|
+
BrotliBitReader* br, size_t num) {
|
239
|
+
while (br->bit_pos_ + 8 <= (BROTLI_64_BITS_LITTLE_ENDIAN ? 64 : 32)
|
240
|
+
&& num > 0) {
|
241
|
+
*dest = (uint8_t)(br->val_ >> br->bit_pos_);
|
242
|
+
br->bit_pos_ += 8;
|
243
|
+
++dest;
|
244
|
+
--num;
|
245
|
+
}
|
246
|
+
memcpy(dest, br->next_in, num);
|
247
|
+
br->avail_in -= (uint32_t)num;
|
248
|
+
br->next_in += num;
|
249
|
+
br->bit_pos_ = 0;
|
250
|
+
}
|
251
|
+
|
252
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
253
|
+
} /* extern "C" */
|
254
|
+
#endif
|
255
|
+
|
256
|
+
#endif /* BROTLI_DEC_BIT_READER_H_ */
|
@@ -0,0 +1,260 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
*/
|
15
|
+
|
16
|
+
/* Lookup table to map the previous two bytes to a context id.
|
17
|
+
|
18
|
+
There are four different context modeling modes defined here:
|
19
|
+
CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
|
20
|
+
CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
|
21
|
+
CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
|
22
|
+
CONTEXT_SIGNED: second-order context model tuned for signed integers.
|
23
|
+
|
24
|
+
The context id for the UTF8 context model is calculated as follows. If p1
|
25
|
+
and p2 are the previous two bytes, we calcualte the context as
|
26
|
+
|
27
|
+
context = kContextLookup[p1] | kContextLookup[p2 + 256].
|
28
|
+
|
29
|
+
If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
30
|
+
equivalent to
|
31
|
+
|
32
|
+
context = 4 * context1(p1) + context2(p2),
|
33
|
+
|
34
|
+
where context1 is based on the previous byte in the following way:
|
35
|
+
|
36
|
+
0 : non-ASCII control
|
37
|
+
1 : \t, \n, \r
|
38
|
+
2 : space
|
39
|
+
3 : other punctuation
|
40
|
+
4 : " '
|
41
|
+
5 : %
|
42
|
+
6 : ( < [ {
|
43
|
+
7 : ) > ] }
|
44
|
+
8 : , ; :
|
45
|
+
9 : .
|
46
|
+
10 : =
|
47
|
+
11 : number
|
48
|
+
12 : upper-case vowel
|
49
|
+
13 : upper-case consonant
|
50
|
+
14 : lower-case vowel
|
51
|
+
15 : lower-case consonant
|
52
|
+
|
53
|
+
and context2 is based on the second last byte:
|
54
|
+
|
55
|
+
0 : control, space
|
56
|
+
1 : punctuation
|
57
|
+
2 : upper-case letter, number
|
58
|
+
3 : lower-case letter
|
59
|
+
|
60
|
+
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
61
|
+
stream it will be a continuation byte, value between 128 and 191), the
|
62
|
+
context is the same as if the second last byte was an ASCII control or space.
|
63
|
+
|
64
|
+
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
65
|
+
be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
66
|
+
the last byte and to a lesser extent on the second last byte if it is ASCII.
|
67
|
+
|
68
|
+
If the last byte is a UTF8 continuation byte, the second last byte can be:
|
69
|
+
- continuation byte: the next byte is probably ASCII or lead byte (assuming
|
70
|
+
4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
71
|
+
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
72
|
+
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
73
|
+
|
74
|
+
The possible value combinations of the previous two bytes, the range of
|
75
|
+
context ids and the type of the next byte is summarized in the table below:
|
76
|
+
|
77
|
+
|--------\-----------------------------------------------------------------|
|
78
|
+
| \ Last byte |
|
79
|
+
| Second \---------------------------------------------------------------|
|
80
|
+
| last byte \ ASCII | cont. byte | lead byte |
|
81
|
+
| \ (0-127) | (128-191) | (192-) |
|
82
|
+
|=============|===================|=====================|==================|
|
83
|
+
| ASCII | next: ASCII/lead | not valid | next: cont. |
|
84
|
+
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
85
|
+
|-------------|-------------------|---------------------|------------------|
|
86
|
+
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
87
|
+
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
88
|
+
|-------------|-------------------|---------------------|------------------|
|
89
|
+
| lead byte | not valid | next: ASCII/lead | not valid |
|
90
|
+
| (192-207) | | context: 0 - 1 | |
|
91
|
+
|-------------|-------------------|---------------------|------------------|
|
92
|
+
| lead byte | not valid | next: cont. | not valid |
|
93
|
+
| (208-) | | context: 2 - 3 | |
|
94
|
+
|-------------|-------------------|---------------------|------------------|
|
95
|
+
|
96
|
+
The context id for the signed context mode is calculated as:
|
97
|
+
|
98
|
+
context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
|
99
|
+
|
100
|
+
For any context modeling modes, the context ids can be calculated by |-ing
|
101
|
+
together two lookups from one table using context model dependent offsets:
|
102
|
+
|
103
|
+
context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
|
104
|
+
|
105
|
+
where offset1 and offset2 are dependent on the context mode.
|
106
|
+
*/
|
107
|
+
|
108
|
+
#ifndef BROTLI_DEC_CONTEXT_H_
|
109
|
+
#define BROTLI_DEC_CONTEXT_H_
|
110
|
+
|
111
|
+
#include "./types.h"
|
112
|
+
|
113
|
+
enum ContextType {
|
114
|
+
CONTEXT_LSB6 = 0,
|
115
|
+
CONTEXT_MSB6 = 1,
|
116
|
+
CONTEXT_UTF8 = 2,
|
117
|
+
CONTEXT_SIGNED = 3
|
118
|
+
};
|
119
|
+
|
120
|
+
/* Common context lookup table for all context modes. */
|
121
|
+
static const uint8_t kContextLookup[1792] = {
|
122
|
+
/* CONTEXT_UTF8, last byte. */
|
123
|
+
/* ASCII range. */
|
124
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
125
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
126
|
+
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
127
|
+
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
128
|
+
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
129
|
+
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
130
|
+
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
131
|
+
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
132
|
+
/* UTF8 continuation byte range. */
|
133
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
134
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
135
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
136
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
137
|
+
/* UTF8 lead byte range. */
|
138
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
139
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
140
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
141
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
142
|
+
/* CONTEXT_UTF8 second last byte. */
|
143
|
+
/* ASCII range. */
|
144
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
145
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
146
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
147
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
148
|
+
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
149
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
150
|
+
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
151
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
152
|
+
/* UTF8 continuation byte range. */
|
153
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
154
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
155
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
156
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
157
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
158
|
+
/* UTF8 lead byte range. */
|
159
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
160
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
161
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
162
|
+
/* CONTEXT_SIGNED, second last byte. */
|
163
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
164
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
165
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
166
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
167
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
168
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
169
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
170
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
171
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
172
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
173
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
174
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
175
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
176
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
177
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
178
|
+
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
179
|
+
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
|
180
|
+
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
181
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
182
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
183
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
184
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
185
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
186
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
187
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
188
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
189
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
190
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
191
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
192
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
193
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
194
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
195
|
+
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
|
196
|
+
/* CONTEXT_LSB6, last byte. */
|
197
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
198
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
199
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
200
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
201
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
202
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
203
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
204
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
205
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
206
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
207
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
208
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
209
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
210
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
211
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
212
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
213
|
+
/* CONTEXT_MSB6, last byte. */
|
214
|
+
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
215
|
+
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
216
|
+
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
217
|
+
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
218
|
+
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
219
|
+
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
220
|
+
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
221
|
+
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
222
|
+
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
223
|
+
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
224
|
+
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
225
|
+
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
226
|
+
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
227
|
+
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
228
|
+
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
229
|
+
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
230
|
+
/* CONTEXT_{M,L}SB6, second last byte, */
|
231
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
232
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
233
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
234
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
235
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
236
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
237
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
238
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
239
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
240
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
241
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
242
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
243
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
244
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
245
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
246
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
247
|
+
};
|
248
|
+
|
249
|
+
static const int kContextLookupOffsets[8] = {
|
250
|
+
/* CONTEXT_LSB6 */
|
251
|
+
1024, 1536,
|
252
|
+
/* CONTEXT_MSB6 */
|
253
|
+
1280, 1536,
|
254
|
+
/* CONTEXT_UTF8 */
|
255
|
+
0, 256,
|
256
|
+
/* CONTEXT_SIGNED */
|
257
|
+
768, 512,
|
258
|
+
};
|
259
|
+
|
260
|
+
#endif /* BROTLI_DEC_CONTEXT_H_ */
|