brotli 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/Gemfile +6 -2
- data/Rakefile +18 -6
- data/bin/before_install.sh +9 -0
- data/brotli.gemspec +7 -13
- data/ext/brotli/brotli.c +209 -11
- data/ext/brotli/buffer.c +1 -7
- data/ext/brotli/buffer.h +1 -1
- data/ext/brotli/extconf.rb +45 -26
- data/lib/brotli/version.rb +1 -1
- data/smoke.sh +1 -1
- data/test/brotli_test.rb +104 -0
- data/test/brotli_writer_test.rb +36 -0
- data/test/test_helper.rb +8 -0
- data/vendor/brotli/c/common/constants.c +15 -0
- data/vendor/brotli/c/common/constants.h +149 -6
- data/vendor/brotli/c/{dec/context.h → common/context.c} +91 -186
- data/vendor/brotli/c/common/context.h +113 -0
- data/vendor/brotli/c/common/dictionary.bin +0 -0
- data/vendor/brotli/c/common/dictionary.bin.br +0 -0
- data/vendor/brotli/c/common/dictionary.c +11 -2
- data/vendor/brotli/c/common/dictionary.h +4 -4
- data/vendor/brotli/c/common/platform.c +22 -0
- data/vendor/brotli/c/common/platform.h +594 -0
- data/vendor/brotli/c/common/transform.c +291 -0
- data/vendor/brotli/c/common/transform.h +85 -0
- data/vendor/brotli/c/common/version.h +8 -1
- data/vendor/brotli/c/dec/bit_reader.c +29 -1
- data/vendor/brotli/c/dec/bit_reader.h +91 -100
- data/vendor/brotli/c/dec/decode.c +665 -437
- data/vendor/brotli/c/dec/huffman.c +65 -84
- data/vendor/brotli/c/dec/huffman.h +67 -14
- data/vendor/brotli/c/dec/prefix.h +1 -20
- data/vendor/brotli/c/dec/state.c +32 -45
- data/vendor/brotli/c/dec/state.h +173 -55
- data/vendor/brotli/c/enc/backward_references.c +27 -16
- data/vendor/brotli/c/enc/backward_references.h +7 -7
- data/vendor/brotli/c/enc/backward_references_hq.c +155 -116
- data/vendor/brotli/c/enc/backward_references_hq.h +22 -23
- data/vendor/brotli/c/enc/backward_references_inc.h +32 -22
- data/vendor/brotli/c/enc/bit_cost.c +1 -1
- data/vendor/brotli/c/enc/bit_cost.h +5 -5
- data/vendor/brotli/c/enc/block_encoder_inc.h +7 -6
- data/vendor/brotli/c/enc/block_splitter.c +5 -6
- data/vendor/brotli/c/enc/block_splitter.h +1 -1
- data/vendor/brotli/c/enc/block_splitter_inc.h +26 -17
- data/vendor/brotli/c/enc/brotli_bit_stream.c +107 -123
- data/vendor/brotli/c/enc/brotli_bit_stream.h +19 -38
- data/vendor/brotli/c/enc/cluster.c +1 -1
- data/vendor/brotli/c/enc/cluster.h +1 -1
- data/vendor/brotli/c/enc/cluster_inc.h +6 -3
- data/vendor/brotli/c/enc/command.c +28 -0
- data/vendor/brotli/c/enc/command.h +52 -42
- data/vendor/brotli/c/enc/compress_fragment.c +21 -22
- data/vendor/brotli/c/enc/compress_fragment.h +1 -1
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +102 -69
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +1 -1
- data/vendor/brotli/c/enc/dictionary_hash.c +1827 -1101
- data/vendor/brotli/c/enc/dictionary_hash.h +2 -1
- data/vendor/brotli/c/enc/encode.c +358 -195
- data/vendor/brotli/c/enc/encoder_dict.c +33 -0
- data/vendor/brotli/c/enc/encoder_dict.h +43 -0
- data/vendor/brotli/c/enc/entropy_encode.c +16 -14
- data/vendor/brotli/c/enc/entropy_encode.h +7 -7
- data/vendor/brotli/c/enc/entropy_encode_static.h +3 -3
- data/vendor/brotli/c/enc/fast_log.c +105 -0
- data/vendor/brotli/c/enc/fast_log.h +20 -99
- data/vendor/brotli/c/enc/find_match_length.h +5 -6
- data/vendor/brotli/c/enc/hash.h +145 -103
- data/vendor/brotli/c/enc/hash_composite_inc.h +125 -0
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +93 -53
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +54 -53
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +58 -54
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +95 -63
- data/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +46 -43
- data/vendor/brotli/c/enc/histogram.c +9 -6
- data/vendor/brotli/c/enc/histogram.h +6 -3
- data/vendor/brotli/c/enc/histogram_inc.h +1 -1
- data/vendor/brotli/c/enc/literal_cost.c +5 -5
- data/vendor/brotli/c/enc/literal_cost.h +2 -2
- data/vendor/brotli/c/enc/memory.c +5 -16
- data/vendor/brotli/c/enc/memory.h +52 -1
- data/vendor/brotli/c/enc/metablock.c +171 -36
- data/vendor/brotli/c/enc/metablock.h +13 -8
- data/vendor/brotli/c/enc/metablock_inc.h +2 -2
- data/vendor/brotli/c/enc/params.h +46 -0
- data/vendor/brotli/c/enc/prefix.h +3 -4
- data/vendor/brotli/c/enc/quality.h +29 -24
- data/vendor/brotli/c/enc/ringbuffer.h +19 -12
- data/vendor/brotli/c/enc/static_dict.c +49 -45
- data/vendor/brotli/c/enc/static_dict.h +4 -3
- data/vendor/brotli/c/enc/static_dict_lut.h +1 -1
- data/vendor/brotli/c/enc/utf8_util.c +21 -21
- data/vendor/brotli/c/enc/utf8_util.h +1 -1
- data/vendor/brotli/c/enc/write_bits.h +35 -38
- data/vendor/brotli/c/include/brotli/decode.h +13 -8
- data/vendor/brotli/c/include/brotli/encode.h +54 -8
- data/vendor/brotli/c/include/brotli/port.h +225 -83
- data/vendor/brotli/c/include/brotli/types.h +0 -7
- metadata +28 -87
- data/.travis.yml +0 -30
- data/spec/brotli_spec.rb +0 -88
- data/spec/inflate_spec.rb +0 -75
- data/spec/spec_helper.rb +0 -4
- data/vendor/brotli/c/dec/port.h +0 -168
- data/vendor/brotli/c/dec/transform.h +0 -300
- data/vendor/brotli/c/enc/context.h +0 -184
- data/vendor/brotli/c/enc/port.h +0 -184
@@ -11,7 +11,7 @@
|
|
11
11
|
#define BROTLI_ENC_PREFIX_H_
|
12
12
|
|
13
13
|
#include "../common/constants.h"
|
14
|
-
#include
|
14
|
+
#include "../common/platform.h"
|
15
15
|
#include <brotli/types.h>
|
16
16
|
#include "./fast_log.h"
|
17
17
|
|
@@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
|
|
39
39
|
size_t prefix = (dist >> bucket) & 1;
|
40
40
|
size_t offset = (2 + prefix) << bucket;
|
41
41
|
size_t nbits = bucket - postfix_bits;
|
42
|
-
*code = (uint16_t)(
|
42
|
+
*code = (uint16_t)((nbits << 10) |
|
43
43
|
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
|
44
44
|
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
45
|
-
*extra_bits = (uint32_t)(
|
46
|
-
(nbits << 24) | ((dist - offset) >> postfix_bits));
|
45
|
+
*extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
|
47
46
|
}
|
48
47
|
}
|
49
48
|
|
@@ -10,7 +10,9 @@
|
|
10
10
|
#ifndef BROTLI_ENC_QUALITY_H_
|
11
11
|
#define BROTLI_ENC_QUALITY_H_
|
12
12
|
|
13
|
+
#include "../common/platform.h"
|
13
14
|
#include <brotli/encode.h>
|
15
|
+
#include "./params.h"
|
14
16
|
|
15
17
|
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
|
16
18
|
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
|
@@ -19,36 +21,16 @@
|
|
19
21
|
|
20
22
|
#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
|
21
23
|
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
|
24
|
+
#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
|
22
25
|
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
|
23
26
|
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
|
24
27
|
#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
|
25
28
|
#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
|
26
29
|
#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
|
27
|
-
/* Only for "font" mode. */
|
28
|
-
#define MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES 10
|
29
30
|
|
30
31
|
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
|
31
32
|
so we buffer at most this much literals and commands. */
|
32
|
-
#define MAX_NUM_DELAYED_SYMBOLS
|
33
|
-
|
34
|
-
typedef struct BrotliHasherParams {
|
35
|
-
int type;
|
36
|
-
int bucket_bits;
|
37
|
-
int block_bits;
|
38
|
-
int hash_len;
|
39
|
-
int num_last_distances_to_check;
|
40
|
-
} BrotliHasherParams;
|
41
|
-
|
42
|
-
/* Encoding parameters */
|
43
|
-
typedef struct BrotliEncoderParams {
|
44
|
-
BrotliEncoderMode mode;
|
45
|
-
int quality;
|
46
|
-
int lgwin;
|
47
|
-
int lgblock;
|
48
|
-
size_t size_hint;
|
49
|
-
BROTLI_BOOL disable_literal_context_modeling;
|
50
|
-
BrotliHasherParams hasher;
|
51
|
-
} BrotliEncoderParams;
|
33
|
+
#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
|
52
34
|
|
53
35
|
/* Returns hash-table size for quality levels 0 and 1. */
|
54
36
|
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
|
@@ -77,10 +59,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates(
|
|
77
59
|
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
|
78
60
|
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
|
79
61
|
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
|
62
|
+
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
|
63
|
+
params->large_window = BROTLI_FALSE;
|
64
|
+
}
|
80
65
|
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
|
81
66
|
params->lgwin = BROTLI_MIN_WINDOW_BITS;
|
82
|
-
} else
|
83
|
-
params->
|
67
|
+
} else {
|
68
|
+
int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
|
69
|
+
BROTLI_MAX_WINDOW_BITS;
|
70
|
+
if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
|
84
71
|
}
|
85
72
|
}
|
86
73
|
|
@@ -155,6 +142,24 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
|
|
155
142
|
hparams->num_last_distances_to_check =
|
156
143
|
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
157
144
|
}
|
145
|
+
|
146
|
+
if (params->lgwin > 24) {
|
147
|
+
/* Different hashers for large window brotli: not for qualities <= 2,
|
148
|
+
these are too fast for large window. Not for qualities >= 10: their
|
149
|
+
hasher already works well with large window. So the changes are:
|
150
|
+
H3 --> H35: for quality 3.
|
151
|
+
H54 --> H55: for quality 4 with size hint > 1MB
|
152
|
+
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
|
153
|
+
if (hparams->type == 3) {
|
154
|
+
hparams->type = 35;
|
155
|
+
}
|
156
|
+
if (hparams->type == 54) {
|
157
|
+
hparams->type = 55;
|
158
|
+
}
|
159
|
+
if (hparams->type == 6) {
|
160
|
+
hparams->type = 65;
|
161
|
+
}
|
162
|
+
}
|
158
163
|
}
|
159
164
|
|
160
165
|
#endif /* BROTLI_ENC_QUALITY_H_ */
|
@@ -11,9 +11,9 @@
|
|
11
11
|
|
12
12
|
#include <string.h> /* memcpy */
|
13
13
|
|
14
|
+
#include "../common/platform.h"
|
14
15
|
#include <brotli/types.h>
|
15
16
|
#include "./memory.h"
|
16
|
-
#include "./port.h"
|
17
17
|
#include "./quality.h"
|
18
18
|
|
19
19
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -41,9 +41,9 @@ typedef struct RingBuffer {
|
|
41
41
|
uint32_t pos_;
|
42
42
|
/* The actual ring buffer containing the copy of the last two bytes, the data,
|
43
43
|
and the copy of the beginning as a tail. */
|
44
|
-
uint8_t
|
44
|
+
uint8_t* data_;
|
45
45
|
/* The start of the ring-buffer. */
|
46
|
-
uint8_t
|
46
|
+
uint8_t* buffer_;
|
47
47
|
} RingBuffer;
|
48
48
|
|
49
49
|
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
|
@@ -75,7 +75,7 @@ static BROTLI_INLINE void RingBufferInitBuffer(
|
|
75
75
|
uint8_t* new_data = BROTLI_ALLOC(
|
76
76
|
m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
|
77
77
|
size_t i;
|
78
|
-
if (BROTLI_IS_OOM(m)) return;
|
78
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
|
79
79
|
if (rb->data_) {
|
80
80
|
memcpy(new_data, rb->data_,
|
81
81
|
2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
|
@@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer(
|
|
91
91
|
}
|
92
92
|
|
93
93
|
static BROTLI_INLINE void RingBufferWriteTail(
|
94
|
-
const uint8_t
|
94
|
+
const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
95
95
|
const size_t masked_pos = rb->pos_ & rb->mask_;
|
96
96
|
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
|
97
97
|
/* Just fill the tail buffer with the beginning data. */
|
@@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail(
|
|
103
103
|
|
104
104
|
/* Push bytes into the ring buffer. */
|
105
105
|
static BROTLI_INLINE void RingBufferWrite(
|
106
|
-
MemoryManager* m, const uint8_t
|
106
|
+
MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
107
107
|
if (rb->pos_ == 0 && n < rb->tail_size_) {
|
108
108
|
/* Special case for the first write: to process the first block, we don't
|
109
109
|
need to allocate the whole ring-buffer and we don't need the tail
|
@@ -125,6 +125,9 @@ static BROTLI_INLINE void RingBufferWrite(
|
|
125
125
|
later when we copy the last two bytes to the first two positions. */
|
126
126
|
rb->buffer_[rb->size_ - 2] = 0;
|
127
127
|
rb->buffer_[rb->size_ - 1] = 0;
|
128
|
+
/* Initialize tail; might be touched by "best_len++" optimization when
|
129
|
+
ring buffer is "full". */
|
130
|
+
rb->buffer_[rb->size_] = 241;
|
128
131
|
}
|
129
132
|
{
|
130
133
|
const size_t masked_pos = rb->pos_ & rb->mask_;
|
@@ -144,12 +147,16 @@ static BROTLI_INLINE void RingBufferWrite(
|
|
144
147
|
n - (rb->size_ - masked_pos));
|
145
148
|
}
|
146
149
|
}
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
rb->pos_ = (rb->pos_ &
|
150
|
+
{
|
151
|
+
BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
|
152
|
+
uint32_t rb_pos_mask = (1u << 31) - 1;
|
153
|
+
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
|
154
|
+
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
|
155
|
+
rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
|
156
|
+
if (not_first_lap) {
|
157
|
+
/* Wrap, but preserve not-a-first-lap feature. */
|
158
|
+
rb->pos_ |= 1u << 31;
|
159
|
+
}
|
153
160
|
}
|
154
161
|
}
|
155
162
|
|
@@ -7,21 +7,17 @@
|
|
7
7
|
#include "./static_dict.h"
|
8
8
|
|
9
9
|
#include "../common/dictionary.h"
|
10
|
+
#include "../common/platform.h"
|
11
|
+
#include "../common/transform.h"
|
12
|
+
#include "./encoder_dict.h"
|
10
13
|
#include "./find_match_length.h"
|
11
|
-
#include "./port.h"
|
12
|
-
#include "./static_dict_lut.h"
|
13
14
|
|
14
15
|
#if defined(__cplusplus) || defined(c_plusplus)
|
15
16
|
extern "C" {
|
16
17
|
#endif
|
17
18
|
|
18
|
-
static const uint8_t
|
19
|
-
|
20
|
-
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
21
|
-
};
|
22
|
-
|
23
|
-
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
|
24
|
-
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
19
|
+
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
|
20
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
|
25
21
|
/* The higher bits contain more mixture from the multiplication,
|
26
22
|
so we take our results from there. */
|
27
23
|
return h >> (32 - kDictNumBits);
|
@@ -79,32 +75,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
|
79
75
|
}
|
80
76
|
|
81
77
|
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
82
|
-
const
|
83
|
-
size_t max_length, uint32_t* matches) {
|
78
|
+
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
|
79
|
+
size_t min_length, size_t max_length, uint32_t* matches) {
|
84
80
|
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
85
81
|
{
|
86
|
-
size_t offset =
|
82
|
+
size_t offset = dictionary->buckets[Hash(data)];
|
87
83
|
BROTLI_BOOL end = !offset;
|
88
84
|
while (!end) {
|
89
|
-
DictWord w =
|
85
|
+
DictWord w = dictionary->dict_words[offset++];
|
90
86
|
const size_t l = w.len & 0x1F;
|
91
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
87
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
92
88
|
const size_t id = w.idx;
|
93
89
|
end = !!(w.len & 0x80);
|
94
90
|
w.len = (uint8_t)l;
|
95
91
|
if (w.transform == 0) {
|
96
92
|
const size_t matchlen =
|
97
|
-
DictMatchLength(dictionary, data, id, l, max_length);
|
93
|
+
DictMatchLength(dictionary->words, data, id, l, max_length);
|
98
94
|
const uint8_t* s;
|
99
95
|
size_t minlen;
|
100
96
|
size_t maxlen;
|
101
97
|
size_t len;
|
102
|
-
/* Transform "" +
|
98
|
+
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
|
103
99
|
if (matchlen == l) {
|
104
100
|
AddMatch(id, l, l, matches);
|
105
101
|
has_found_match = BROTLI_TRUE;
|
106
102
|
}
|
107
|
-
/* Transforms "" +
|
103
|
+
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
|
104
|
+
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
|
108
105
|
if (matchlen >= l - 1) {
|
109
106
|
AddMatch(id + 12 * n, l - 1, l, matches);
|
110
107
|
if (l + 2 < max_length &&
|
@@ -114,19 +111,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
114
111
|
}
|
115
112
|
has_found_match = BROTLI_TRUE;
|
116
113
|
}
|
117
|
-
/* Transform "" +
|
114
|
+
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
|
118
115
|
minlen = min_length;
|
119
116
|
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
120
117
|
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
121
118
|
for (len = minlen; len <= maxlen; ++len) {
|
122
|
-
|
119
|
+
size_t cut = l - len;
|
120
|
+
size_t transform_id = (cut << 2) +
|
121
|
+
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
|
122
|
+
AddMatch(id + transform_id * n, len, l, matches);
|
123
123
|
has_found_match = BROTLI_TRUE;
|
124
124
|
}
|
125
125
|
if (matchlen < l || l + 6 >= max_length) {
|
126
126
|
continue;
|
127
127
|
}
|
128
128
|
s = &data[l];
|
129
|
-
/* Transforms "" +
|
129
|
+
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
|
130
130
|
if (s[0] == ' ') {
|
131
131
|
AddMatch(id + n, l + 1, l, matches);
|
132
132
|
if (s[1] == 'a') {
|
@@ -273,12 +273,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
273
273
|
}
|
274
274
|
}
|
275
275
|
} else {
|
276
|
-
/* Set is_all_caps=0 for
|
277
|
-
is_all_caps=1 otherwise (
|
276
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
277
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
278
|
+
transform. */
|
278
279
|
const BROTLI_BOOL is_all_caps =
|
279
|
-
TO_BROTLI_BOOL(w.transform !=
|
280
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
280
281
|
const uint8_t* s;
|
281
|
-
if (!IsMatch(dictionary, w, data, max_length)) {
|
282
|
+
if (!IsMatch(dictionary->words, w, data, max_length)) {
|
282
283
|
continue;
|
283
284
|
}
|
284
285
|
/* Transform "" + kUppercase{First,All} + "" */
|
@@ -323,27 +324,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
323
324
|
/* Transforms with prefixes " " and "." */
|
324
325
|
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
325
326
|
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
326
|
-
size_t offset =
|
327
|
+
size_t offset = dictionary->buckets[Hash(&data[1])];
|
327
328
|
BROTLI_BOOL end = !offset;
|
328
329
|
while (!end) {
|
329
|
-
DictWord w =
|
330
|
+
DictWord w = dictionary->dict_words[offset++];
|
330
331
|
const size_t l = w.len & 0x1F;
|
331
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
332
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
332
333
|
const size_t id = w.idx;
|
333
334
|
end = !!(w.len & 0x80);
|
334
335
|
w.len = (uint8_t)l;
|
335
336
|
if (w.transform == 0) {
|
336
337
|
const uint8_t* s;
|
337
|
-
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
338
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
338
339
|
continue;
|
339
340
|
}
|
340
|
-
/* Transforms " " +
|
341
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
|
342
|
+
"." + BROTLI_TRANSFORM_IDENTITY + "" */
|
341
343
|
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
342
344
|
has_found_match = BROTLI_TRUE;
|
343
345
|
if (l + 2 >= max_length) {
|
344
346
|
continue;
|
345
347
|
}
|
346
|
-
/* Transforms " " +
|
348
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
|
349
|
+
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
|
347
350
|
*/
|
348
351
|
s = &data[l + 1];
|
349
352
|
if (s[0] == ' ') {
|
@@ -370,12 +373,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
370
373
|
}
|
371
374
|
}
|
372
375
|
} else if (is_space) {
|
373
|
-
/* Set is_all_caps=0 for
|
374
|
-
is_all_caps=1 otherwise (
|
376
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
377
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
378
|
+
transform. */
|
375
379
|
const BROTLI_BOOL is_all_caps =
|
376
|
-
TO_BROTLI_BOOL(w.transform !=
|
380
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
377
381
|
const uint8_t* s;
|
378
|
-
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
382
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
379
383
|
continue;
|
380
384
|
}
|
381
385
|
/* Transforms " " + kUppercase{First,All} + "" */
|
@@ -411,22 +415,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
411
415
|
}
|
412
416
|
}
|
413
417
|
if (max_length >= 6) {
|
414
|
-
/* Transforms with prefixes "e ", "s ", ", " and "\
|
418
|
+
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
|
415
419
|
if ((data[1] == ' ' &&
|
416
420
|
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
417
|
-
(data[0] ==
|
418
|
-
size_t offset =
|
421
|
+
(data[0] == 0xC2 && data[1] == 0xA0)) {
|
422
|
+
size_t offset = dictionary->buckets[Hash(&data[2])];
|
419
423
|
BROTLI_BOOL end = !offset;
|
420
424
|
while (!end) {
|
421
|
-
DictWord w =
|
425
|
+
DictWord w = dictionary->dict_words[offset++];
|
422
426
|
const size_t l = w.len & 0x1F;
|
423
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
427
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
424
428
|
const size_t id = w.idx;
|
425
429
|
end = !!(w.len & 0x80);
|
426
430
|
w.len = (uint8_t)l;
|
427
431
|
if (w.transform == 0 &&
|
428
|
-
IsMatch(dictionary, w, &data[2], max_length - 2)) {
|
429
|
-
if (data[0] ==
|
432
|
+
IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
|
433
|
+
if (data[0] == 0xC2) {
|
430
434
|
AddMatch(id + 102 * n, l + 2, l, matches);
|
431
435
|
has_found_match = BROTLI_TRUE;
|
432
436
|
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
@@ -444,17 +448,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
444
448
|
data[3] == 'e' && data[4] == ' ') ||
|
445
449
|
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
446
450
|
data[3] == 'm' && data[4] == '/')) {
|
447
|
-
size_t offset =
|
451
|
+
size_t offset = dictionary->buckets[Hash(&data[5])];
|
448
452
|
BROTLI_BOOL end = !offset;
|
449
453
|
while (!end) {
|
450
|
-
DictWord w =
|
454
|
+
DictWord w = dictionary->dict_words[offset++];
|
451
455
|
const size_t l = w.len & 0x1F;
|
452
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
456
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
453
457
|
const size_t id = w.idx;
|
454
458
|
end = !!(w.len & 0x80);
|
455
459
|
w.len = (uint8_t)l;
|
456
460
|
if (w.transform == 0 &&
|
457
|
-
IsMatch(dictionary, w, &data[5], max_length - 5)) {
|
461
|
+
IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
|
458
462
|
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
459
463
|
has_found_match = BROTLI_TRUE;
|
460
464
|
if (l + 5 < max_length) {
|
@@ -10,15 +10,16 @@
|
|
10
10
|
#define BROTLI_ENC_STATIC_DICT_H_
|
11
11
|
|
12
12
|
#include "../common/dictionary.h"
|
13
|
+
#include "../common/platform.h"
|
13
14
|
#include <brotli/types.h>
|
14
|
-
#include "./
|
15
|
+
#include "./encoder_dict.h"
|
15
16
|
|
16
17
|
#if defined(__cplusplus) || defined(c_plusplus)
|
17
18
|
extern "C" {
|
18
19
|
#endif
|
19
20
|
|
20
21
|
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
|
21
|
-
static const uint32_t kInvalidMatch =
|
22
|
+
static const uint32_t kInvalidMatch = 0xFFFFFFF;
|
22
23
|
|
23
24
|
/* Matches data against static dictionary words, and for each length l,
|
24
25
|
for which a match is found, updates matches[l] to be the minimum possible
|
@@ -28,7 +29,7 @@ static const uint32_t kInvalidMatch = 0xfffffff;
|
|
28
29
|
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
29
30
|
all elements are initialized to kInvalidMatch */
|
30
31
|
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
31
|
-
const
|
32
|
+
const BrotliEncoderDictionary* dictionary,
|
32
33
|
const uint8_t* data, size_t min_length, size_t max_length,
|
33
34
|
uint32_t* matches);
|
34
35
|
|
@@ -23,7 +23,7 @@ typedef struct DictWord {
|
|
23
23
|
} DictWord;
|
24
24
|
|
25
25
|
static const int kDictNumBits = 15;
|
26
|
-
static const uint32_t kDictHashMul32 =
|
26
|
+
static const uint32_t kDictHashMul32 = 0x1E35A7BD;
|
27
27
|
|
28
28
|
static const uint16_t kStaticDictionaryBuckets[32768] = {
|
29
29
|
1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
|
@@ -25,37 +25,37 @@ static size_t BrotliParseAsUTF8(
|
|
25
25
|
}
|
26
26
|
/* 2-byte UTF8 */
|
27
27
|
if (size > 1u &&
|
28
|
-
(input[0] &
|
29
|
-
(input[1] &
|
30
|
-
*symbol = (((input[0] &
|
31
|
-
(input[1] &
|
32
|
-
if (*symbol >
|
28
|
+
(input[0] & 0xE0) == 0xC0 &&
|
29
|
+
(input[1] & 0xC0) == 0x80) {
|
30
|
+
*symbol = (((input[0] & 0x1F) << 6) |
|
31
|
+
(input[1] & 0x3F));
|
32
|
+
if (*symbol > 0x7F) {
|
33
33
|
return 2;
|
34
34
|
}
|
35
35
|
}
|
36
36
|
/* 3-byte UFT8 */
|
37
37
|
if (size > 2u &&
|
38
|
-
(input[0] &
|
39
|
-
(input[1] &
|
40
|
-
(input[2] &
|
41
|
-
*symbol = (((input[0] &
|
42
|
-
((input[1] &
|
43
|
-
(input[2] &
|
44
|
-
if (*symbol >
|
38
|
+
(input[0] & 0xF0) == 0xE0 &&
|
39
|
+
(input[1] & 0xC0) == 0x80 &&
|
40
|
+
(input[2] & 0xC0) == 0x80) {
|
41
|
+
*symbol = (((input[0] & 0x0F) << 12) |
|
42
|
+
((input[1] & 0x3F) << 6) |
|
43
|
+
(input[2] & 0x3F));
|
44
|
+
if (*symbol > 0x7FF) {
|
45
45
|
return 3;
|
46
46
|
}
|
47
47
|
}
|
48
48
|
/* 4-byte UFT8 */
|
49
49
|
if (size > 3u &&
|
50
|
-
(input[0] &
|
51
|
-
(input[1] &
|
52
|
-
(input[2] &
|
53
|
-
(input[3] &
|
50
|
+
(input[0] & 0xF8) == 0xF0 &&
|
51
|
+
(input[1] & 0xC0) == 0x80 &&
|
52
|
+
(input[2] & 0xC0) == 0x80 &&
|
53
|
+
(input[3] & 0xC0) == 0x80) {
|
54
54
|
*symbol = (((input[0] & 0x07) << 18) |
|
55
|
-
((input[1] &
|
56
|
-
((input[2] &
|
57
|
-
(input[3] &
|
58
|
-
if (*symbol >
|
55
|
+
((input[1] & 0x3F) << 12) |
|
56
|
+
((input[2] & 0x3F) << 6) |
|
57
|
+
(input[3] & 0x3F));
|
58
|
+
if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
|
59
59
|
return 4;
|
60
60
|
}
|
61
61
|
}
|
@@ -77,7 +77,7 @@ BROTLI_BOOL BrotliIsMostlyUTF8(
|
|
77
77
|
i += bytes_read;
|
78
78
|
if (symbol < 0x110000) size_utf8 += bytes_read;
|
79
79
|
}
|
80
|
-
return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
|
80
|
+
return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
|
81
81
|
}
|
82
82
|
|
83
83
|
#if defined(__cplusplus) || defined(c_plusplus)
|