brotli 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/Gemfile +6 -2
- data/Rakefile +18 -6
- data/bin/before_install.sh +9 -0
- data/brotli.gemspec +7 -13
- data/ext/brotli/brotli.c +209 -11
- data/ext/brotli/buffer.c +1 -7
- data/ext/brotli/buffer.h +1 -1
- data/ext/brotli/extconf.rb +45 -26
- data/lib/brotli/version.rb +1 -1
- data/smoke.sh +1 -1
- data/test/brotli_test.rb +104 -0
- data/test/brotli_writer_test.rb +36 -0
- data/test/test_helper.rb +8 -0
- data/vendor/brotli/c/common/constants.c +15 -0
- data/vendor/brotli/c/common/constants.h +149 -6
- data/vendor/brotli/c/{dec/context.h → common/context.c} +91 -186
- data/vendor/brotli/c/common/context.h +113 -0
- data/vendor/brotli/c/common/dictionary.bin +0 -0
- data/vendor/brotli/c/common/dictionary.bin.br +0 -0
- data/vendor/brotli/c/common/dictionary.c +11 -2
- data/vendor/brotli/c/common/dictionary.h +4 -4
- data/vendor/brotli/c/common/platform.c +22 -0
- data/vendor/brotli/c/common/platform.h +594 -0
- data/vendor/brotli/c/common/transform.c +291 -0
- data/vendor/brotli/c/common/transform.h +85 -0
- data/vendor/brotli/c/common/version.h +8 -1
- data/vendor/brotli/c/dec/bit_reader.c +29 -1
- data/vendor/brotli/c/dec/bit_reader.h +91 -100
- data/vendor/brotli/c/dec/decode.c +665 -437
- data/vendor/brotli/c/dec/huffman.c +65 -84
- data/vendor/brotli/c/dec/huffman.h +67 -14
- data/vendor/brotli/c/dec/prefix.h +1 -20
- data/vendor/brotli/c/dec/state.c +32 -45
- data/vendor/brotli/c/dec/state.h +173 -55
- data/vendor/brotli/c/enc/backward_references.c +27 -16
- data/vendor/brotli/c/enc/backward_references.h +7 -7
- data/vendor/brotli/c/enc/backward_references_hq.c +155 -116
- data/vendor/brotli/c/enc/backward_references_hq.h +22 -23
- data/vendor/brotli/c/enc/backward_references_inc.h +32 -22
- data/vendor/brotli/c/enc/bit_cost.c +1 -1
- data/vendor/brotli/c/enc/bit_cost.h +5 -5
- data/vendor/brotli/c/enc/block_encoder_inc.h +7 -6
- data/vendor/brotli/c/enc/block_splitter.c +5 -6
- data/vendor/brotli/c/enc/block_splitter.h +1 -1
- data/vendor/brotli/c/enc/block_splitter_inc.h +26 -17
- data/vendor/brotli/c/enc/brotli_bit_stream.c +107 -123
- data/vendor/brotli/c/enc/brotli_bit_stream.h +19 -38
- data/vendor/brotli/c/enc/cluster.c +1 -1
- data/vendor/brotli/c/enc/cluster.h +1 -1
- data/vendor/brotli/c/enc/cluster_inc.h +6 -3
- data/vendor/brotli/c/enc/command.c +28 -0
- data/vendor/brotli/c/enc/command.h +52 -42
- data/vendor/brotli/c/enc/compress_fragment.c +21 -22
- data/vendor/brotli/c/enc/compress_fragment.h +1 -1
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +102 -69
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +1 -1
- data/vendor/brotli/c/enc/dictionary_hash.c +1827 -1101
- data/vendor/brotli/c/enc/dictionary_hash.h +2 -1
- data/vendor/brotli/c/enc/encode.c +358 -195
- data/vendor/brotli/c/enc/encoder_dict.c +33 -0
- data/vendor/brotli/c/enc/encoder_dict.h +43 -0
- data/vendor/brotli/c/enc/entropy_encode.c +16 -14
- data/vendor/brotli/c/enc/entropy_encode.h +7 -7
- data/vendor/brotli/c/enc/entropy_encode_static.h +3 -3
- data/vendor/brotli/c/enc/fast_log.c +105 -0
- data/vendor/brotli/c/enc/fast_log.h +20 -99
- data/vendor/brotli/c/enc/find_match_length.h +5 -6
- data/vendor/brotli/c/enc/hash.h +145 -103
- data/vendor/brotli/c/enc/hash_composite_inc.h +125 -0
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +93 -53
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +54 -53
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +58 -54
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +95 -63
- data/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +46 -43
- data/vendor/brotli/c/enc/histogram.c +9 -6
- data/vendor/brotli/c/enc/histogram.h +6 -3
- data/vendor/brotli/c/enc/histogram_inc.h +1 -1
- data/vendor/brotli/c/enc/literal_cost.c +5 -5
- data/vendor/brotli/c/enc/literal_cost.h +2 -2
- data/vendor/brotli/c/enc/memory.c +5 -16
- data/vendor/brotli/c/enc/memory.h +52 -1
- data/vendor/brotli/c/enc/metablock.c +171 -36
- data/vendor/brotli/c/enc/metablock.h +13 -8
- data/vendor/brotli/c/enc/metablock_inc.h +2 -2
- data/vendor/brotli/c/enc/params.h +46 -0
- data/vendor/brotli/c/enc/prefix.h +3 -4
- data/vendor/brotli/c/enc/quality.h +29 -24
- data/vendor/brotli/c/enc/ringbuffer.h +19 -12
- data/vendor/brotli/c/enc/static_dict.c +49 -45
- data/vendor/brotli/c/enc/static_dict.h +4 -3
- data/vendor/brotli/c/enc/static_dict_lut.h +1 -1
- data/vendor/brotli/c/enc/utf8_util.c +21 -21
- data/vendor/brotli/c/enc/utf8_util.h +1 -1
- data/vendor/brotli/c/enc/write_bits.h +35 -38
- data/vendor/brotli/c/include/brotli/decode.h +13 -8
- data/vendor/brotli/c/include/brotli/encode.h +54 -8
- data/vendor/brotli/c/include/brotli/port.h +225 -83
- data/vendor/brotli/c/include/brotli/types.h +0 -7
- metadata +28 -87
- data/.travis.yml +0 -30
- data/spec/brotli_spec.rb +0 -88
- data/spec/inflate_spec.rb +0 -75
- data/spec/spec_helper.rb +0 -4
- data/vendor/brotli/c/dec/port.h +0 -168
- data/vendor/brotli/c/dec/transform.h +0 -300
- data/vendor/brotli/c/enc/context.h +0 -184
- data/vendor/brotli/c/enc/port.h +0 -184
@@ -1,115 +1,81 @@
|
|
1
|
-
|
1
|
+
#include "./context.h"
|
2
2
|
|
3
|
-
|
4
|
-
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
-
*/
|
6
|
-
|
7
|
-
/* Lookup table to map the previous two bytes to a context id.
|
8
|
-
|
9
|
-
There are four different context modeling modes defined here:
|
10
|
-
CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
|
11
|
-
CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
|
12
|
-
CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
|
13
|
-
CONTEXT_SIGNED: second-order context model tuned for signed integers.
|
14
|
-
|
15
|
-
The context id for the UTF8 context model is calculated as follows. If p1
|
16
|
-
and p2 are the previous two bytes, we calculate the context as
|
17
|
-
|
18
|
-
context = kContextLookup[p1] | kContextLookup[p2 + 256].
|
19
|
-
|
20
|
-
If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
21
|
-
equivalent to
|
22
|
-
|
23
|
-
context = 4 * context1(p1) + context2(p2),
|
24
|
-
|
25
|
-
where context1 is based on the previous byte in the following way:
|
26
|
-
|
27
|
-
0 : non-ASCII control
|
28
|
-
1 : \t, \n, \r
|
29
|
-
2 : space
|
30
|
-
3 : other punctuation
|
31
|
-
4 : " '
|
32
|
-
5 : %
|
33
|
-
6 : ( < [ {
|
34
|
-
7 : ) > ] }
|
35
|
-
8 : , ; :
|
36
|
-
9 : .
|
37
|
-
10 : =
|
38
|
-
11 : number
|
39
|
-
12 : upper-case vowel
|
40
|
-
13 : upper-case consonant
|
41
|
-
14 : lower-case vowel
|
42
|
-
15 : lower-case consonant
|
43
|
-
|
44
|
-
and context2 is based on the second last byte:
|
45
|
-
|
46
|
-
0 : control, space
|
47
|
-
1 : punctuation
|
48
|
-
2 : upper-case letter, number
|
49
|
-
3 : lower-case letter
|
50
|
-
|
51
|
-
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
52
|
-
stream it will be a continuation byte, value between 128 and 191), the
|
53
|
-
context is the same as if the second last byte was an ASCII control or space.
|
54
|
-
|
55
|
-
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
56
|
-
be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
57
|
-
the last byte and to a lesser extent on the second last byte if it is ASCII.
|
58
|
-
|
59
|
-
If the last byte is a UTF8 continuation byte, the second last byte can be:
|
60
|
-
- continuation byte: the next byte is probably ASCII or lead byte (assuming
|
61
|
-
4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
62
|
-
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
63
|
-
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
64
|
-
|
65
|
-
The possible value combinations of the previous two bytes, the range of
|
66
|
-
context ids and the type of the next byte is summarized in the table below:
|
67
|
-
|
68
|
-
|--------\-----------------------------------------------------------------|
|
69
|
-
| \ Last byte |
|
70
|
-
| Second \---------------------------------------------------------------|
|
71
|
-
| last byte \ ASCII | cont. byte | lead byte |
|
72
|
-
| \ (0-127) | (128-191) | (192-) |
|
73
|
-
|=============|===================|=====================|==================|
|
74
|
-
| ASCII | next: ASCII/lead | not valid | next: cont. |
|
75
|
-
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
76
|
-
|-------------|-------------------|---------------------|------------------|
|
77
|
-
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
78
|
-
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
79
|
-
|-------------|-------------------|---------------------|------------------|
|
80
|
-
| lead byte | not valid | next: ASCII/lead | not valid |
|
81
|
-
| (192-207) | | context: 0 - 1 | |
|
82
|
-
|-------------|-------------------|---------------------|------------------|
|
83
|
-
| lead byte | not valid | next: cont. | not valid |
|
84
|
-
| (208-) | | context: 2 - 3 | |
|
85
|
-
|-------------|-------------------|---------------------|------------------|
|
86
|
-
|
87
|
-
The context id for the signed context mode is calculated as:
|
88
|
-
|
89
|
-
context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
|
90
|
-
|
91
|
-
For any context modeling modes, the context ids can be calculated by |-ing
|
92
|
-
together two lookups from one table using context model dependent offsets:
|
93
|
-
|
94
|
-
context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
|
3
|
+
#include <brotli/types.h>
|
95
4
|
|
96
|
-
|
97
|
-
|
5
|
+
/* Common context lookup table for all context modes. */
|
6
|
+
const uint8_t _kBrotliContextLookupTable[2048] = {
|
7
|
+
/* CONTEXT_LSB6, last byte. */
|
8
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
9
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
10
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
11
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
12
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
13
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
14
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
15
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
16
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
17
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
18
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
19
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
20
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
21
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
22
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
23
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
98
24
|
|
99
|
-
|
100
|
-
|
25
|
+
/* CONTEXT_LSB6, second last byte, */
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
101
42
|
|
102
|
-
|
43
|
+
/* CONTEXT_MSB6, last byte. */
|
44
|
+
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
45
|
+
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
46
|
+
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
47
|
+
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
48
|
+
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
49
|
+
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
50
|
+
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
51
|
+
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
52
|
+
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
53
|
+
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
54
|
+
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
55
|
+
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
56
|
+
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
57
|
+
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
58
|
+
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
59
|
+
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
103
60
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
61
|
+
/* CONTEXT_MSB6, second last byte, */
|
62
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
63
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
64
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
65
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
66
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
67
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
68
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
69
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
70
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
110
78
|
|
111
|
-
/* Common context lookup table for all context modes. */
|
112
|
-
static const uint8_t kContextLookup[1792] = {
|
113
79
|
/* CONTEXT_UTF8, last byte. */
|
114
80
|
/* ASCII range. */
|
115
81
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
@@ -130,6 +96,7 @@ static const uint8_t kContextLookup[1792] = {
|
|
130
96
|
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
131
97
|
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
132
98
|
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
99
|
+
|
133
100
|
/* CONTEXT_UTF8 second last byte. */
|
134
101
|
/* ASCII range. */
|
135
102
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -150,23 +117,7 @@ static const uint8_t kContextLookup[1792] = {
|
|
150
117
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
151
118
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
152
119
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
153
|
-
|
154
|
-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
155
|
-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
156
|
-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
157
|
-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
158
|
-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
159
|
-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
160
|
-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
161
|
-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
162
|
-
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
163
|
-
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
164
|
-
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
165
|
-
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
166
|
-
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
167
|
-
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
168
|
-
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
169
|
-
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
120
|
+
|
170
121
|
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
|
171
122
|
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
172
123
|
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
@@ -184,68 +135,22 @@ static const uint8_t kContextLookup[1792] = {
|
|
184
135
|
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
185
136
|
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
186
137
|
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
|
187
|
-
/* CONTEXT_LSB6, last byte. */
|
188
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
189
|
-
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
190
|
-
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
191
|
-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
192
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
193
|
-
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
194
|
-
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
195
|
-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
196
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
197
|
-
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
198
|
-
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
199
|
-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
200
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
201
|
-
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
202
|
-
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
203
|
-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
204
|
-
/* CONTEXT_MSB6, last byte. */
|
205
|
-
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
206
|
-
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
207
|
-
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
208
|
-
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
209
|
-
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
210
|
-
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
211
|
-
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
212
|
-
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
213
|
-
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
214
|
-
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
215
|
-
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
216
|
-
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
217
|
-
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
218
|
-
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
219
|
-
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
220
|
-
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
221
|
-
/* CONTEXT_{M,L}SB6, second last byte, */
|
222
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
223
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
224
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
225
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
226
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
227
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
228
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
229
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
230
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
231
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
232
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
233
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
234
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
235
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
236
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
237
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
238
|
-
};
|
239
138
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
139
|
+
/* CONTEXT_SIGNED, second last byte. */
|
140
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
141
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
142
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
143
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
144
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
145
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
146
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
147
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
148
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
149
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
150
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
151
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
152
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
153
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
154
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
155
|
+
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
249
156
|
};
|
250
|
-
|
251
|
-
#endif /* BROTLI_DEC_CONTEXT_H_ */
|
@@ -0,0 +1,113 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Lookup table to map the previous two bytes to a context id.
|
8
|
+
|
9
|
+
There are four different context modeling modes defined here:
|
10
|
+
CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
|
11
|
+
CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
|
12
|
+
CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
|
13
|
+
CONTEXT_SIGNED: second-order context model tuned for signed integers.
|
14
|
+
|
15
|
+
If |p1| and |p2| are the previous two bytes, and |mode| is current context
|
16
|
+
mode, we calculate the context as:
|
17
|
+
|
18
|
+
context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
|
19
|
+
|
20
|
+
For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
|
21
|
+
(i.e. < 128), this will be equivalent to
|
22
|
+
|
23
|
+
context = 4 * context1(p1) + context2(p2),
|
24
|
+
|
25
|
+
where context1 is based on the previous byte in the following way:
|
26
|
+
|
27
|
+
0 : non-ASCII control
|
28
|
+
1 : \t, \n, \r
|
29
|
+
2 : space
|
30
|
+
3 : other punctuation
|
31
|
+
4 : " '
|
32
|
+
5 : %
|
33
|
+
6 : ( < [ {
|
34
|
+
7 : ) > ] }
|
35
|
+
8 : , ; :
|
36
|
+
9 : .
|
37
|
+
10 : =
|
38
|
+
11 : number
|
39
|
+
12 : upper-case vowel
|
40
|
+
13 : upper-case consonant
|
41
|
+
14 : lower-case vowel
|
42
|
+
15 : lower-case consonant
|
43
|
+
|
44
|
+
and context2 is based on the second last byte:
|
45
|
+
|
46
|
+
0 : control, space
|
47
|
+
1 : punctuation
|
48
|
+
2 : upper-case letter, number
|
49
|
+
3 : lower-case letter
|
50
|
+
|
51
|
+
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
52
|
+
stream it will be a continuation byte, value between 128 and 191), the
|
53
|
+
context is the same as if the second last byte was an ASCII control or space.
|
54
|
+
|
55
|
+
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
56
|
+
be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
57
|
+
the last byte and to a lesser extent on the second last byte if it is ASCII.
|
58
|
+
|
59
|
+
If the last byte is a UTF8 continuation byte, the second last byte can be:
|
60
|
+
- continuation byte: the next byte is probably ASCII or lead byte (assuming
|
61
|
+
4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
62
|
+
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
63
|
+
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
64
|
+
|
65
|
+
The possible value combinations of the previous two bytes, the range of
|
66
|
+
context ids and the type of the next byte is summarized in the table below:
|
67
|
+
|
68
|
+
|--------\-----------------------------------------------------------------|
|
69
|
+
| \ Last byte |
|
70
|
+
| Second \---------------------------------------------------------------|
|
71
|
+
| last byte \ ASCII | cont. byte | lead byte |
|
72
|
+
| \ (0-127) | (128-191) | (192-) |
|
73
|
+
|=============|===================|=====================|==================|
|
74
|
+
| ASCII | next: ASCII/lead | not valid | next: cont. |
|
75
|
+
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
76
|
+
|-------------|-------------------|---------------------|------------------|
|
77
|
+
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
78
|
+
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
79
|
+
|-------------|-------------------|---------------------|------------------|
|
80
|
+
| lead byte | not valid | next: ASCII/lead | not valid |
|
81
|
+
| (192-207) | | context: 0 - 1 | |
|
82
|
+
|-------------|-------------------|---------------------|------------------|
|
83
|
+
| lead byte | not valid | next: cont. | not valid |
|
84
|
+
| (208-) | | context: 2 - 3 | |
|
85
|
+
|-------------|-------------------|---------------------|------------------|
|
86
|
+
*/
|
87
|
+
|
88
|
+
#ifndef BROTLI_COMMON_CONTEXT_H_
|
89
|
+
#define BROTLI_COMMON_CONTEXT_H_
|
90
|
+
|
91
|
+
#include <brotli/port.h>
|
92
|
+
#include <brotli/types.h>
|
93
|
+
|
94
|
+
typedef enum ContextType {
|
95
|
+
CONTEXT_LSB6 = 0,
|
96
|
+
CONTEXT_MSB6 = 1,
|
97
|
+
CONTEXT_UTF8 = 2,
|
98
|
+
CONTEXT_SIGNED = 3
|
99
|
+
} ContextType;
|
100
|
+
|
101
|
+
/* "Soft-private", it is exported, but not "advertised" as API. */
|
102
|
+
/* Common context lookup table for all context modes. */
|
103
|
+
BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048];
|
104
|
+
|
105
|
+
typedef const uint8_t* ContextLut;
|
106
|
+
|
107
|
+
/* typeof(MODE) == ContextType; returns ContextLut */
|
108
|
+
#define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9])
|
109
|
+
|
110
|
+
/* typeof(LUT) == ContextLut */
|
111
|
+
#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
|
112
|
+
|
113
|
+
#endif /* BROTLI_COMMON_CONTEXT_H_ */
|
File without changes
|
Binary file
|
@@ -5,12 +5,13 @@
|
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include "./dictionary.h"
|
8
|
+
#include "./platform.h"
|
8
9
|
|
9
10
|
#if defined(__cplusplus) || defined(c_plusplus)
|
10
11
|
extern "C" {
|
11
12
|
#endif
|
12
13
|
|
13
|
-
#
|
14
|
+
#if !defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
|
14
15
|
static const uint8_t kBrotliDictionaryData[] =
|
15
16
|
{
|
16
17
|
116,105,109,101,100,111,119,110,108,105,102,101,108,101,102,116,98,97,99,107,99,
|
@@ -5862,7 +5863,11 @@ static const uint8_t kBrotliDictionaryData[] =
|
|
5862
5863
|
;
|
5863
5864
|
#endif /* !BROTLI_EXTERNAL_DICTIONARY_DATA */
|
5864
5865
|
|
5866
|
+
#if !defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
|
5867
|
+
static const BrotliDictionary kBrotliDictionary = {
|
5868
|
+
#else
|
5865
5869
|
static BrotliDictionary kBrotliDictionary = {
|
5870
|
+
#endif
|
5866
5871
|
/* size_bits_by_length */
|
5867
5872
|
{
|
5868
5873
|
0, 0, 0, 0, 10, 10, 11, 11,
|
@@ -5883,7 +5888,7 @@ static BrotliDictionary kBrotliDictionary = {
|
|
5883
5888
|
122784,
|
5884
5889
|
|
5885
5890
|
/* data */
|
5886
|
-
#
|
5891
|
+
#if defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
|
5887
5892
|
NULL
|
5888
5893
|
#else
|
5889
5894
|
kBrotliDictionaryData
|
@@ -5895,9 +5900,13 @@ const BrotliDictionary* BrotliGetDictionary() {
|
|
5895
5900
|
}
|
5896
5901
|
|
5897
5902
|
void BrotliSetDictionaryData(const uint8_t* data) {
|
5903
|
+
#if defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
|
5898
5904
|
if (!!data && !kBrotliDictionary.data) {
|
5899
5905
|
kBrotliDictionary.data = data;
|
5900
5906
|
}
|
5907
|
+
#else
|
5908
|
+
BROTLI_UNUSED(data); // Appease -Werror=unused-parameter
|
5909
|
+
#endif
|
5901
5910
|
}
|
5902
5911
|
|
5903
5912
|
#if defined(__cplusplus) || defined(c_plusplus)
|