brotli 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.travis.yml +11 -3
- data/Gemfile +2 -0
- data/ext/brotli/brotli.c +279 -0
- data/ext/brotli/brotli.h +2 -0
- data/ext/brotli/buffer.c +95 -0
- data/ext/brotli/buffer.h +19 -0
- data/ext/brotli/extconf.rb +21 -81
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/dec/bit_reader.c +5 -5
- data/vendor/brotli/dec/bit_reader.h +15 -15
- data/vendor/brotli/dec/context.h +1 -1
- data/vendor/brotli/dec/decode.c +433 -348
- data/vendor/brotli/dec/decode.h +74 -48
- data/vendor/brotli/dec/huffman.c +5 -4
- data/vendor/brotli/dec/huffman.h +4 -4
- data/vendor/brotli/dec/port.h +2 -95
- data/vendor/brotli/dec/prefix.h +5 -3
- data/vendor/brotli/dec/state.c +15 -27
- data/vendor/brotli/dec/state.h +21 -17
- data/vendor/brotli/dec/transform.h +1 -1
- data/vendor/brotli/enc/backward_references.c +892 -0
- data/vendor/brotli/enc/backward_references.h +85 -102
- data/vendor/brotli/enc/backward_references_inc.h +147 -0
- data/vendor/brotli/enc/bit_cost.c +35 -0
- data/vendor/brotli/enc/bit_cost.h +23 -121
- data/vendor/brotli/enc/bit_cost_inc.h +127 -0
- data/vendor/brotli/enc/block_encoder_inc.h +33 -0
- data/vendor/brotli/enc/block_splitter.c +197 -0
- data/vendor/brotli/enc/block_splitter.h +40 -50
- data/vendor/brotli/enc/block_splitter_inc.h +432 -0
- data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
- data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
- data/vendor/brotli/enc/cluster.c +56 -0
- data/vendor/brotli/enc/cluster.h +23 -305
- data/vendor/brotli/enc/cluster_inc.h +315 -0
- data/vendor/brotli/enc/command.h +83 -76
- data/vendor/brotli/enc/compress_fragment.c +747 -0
- data/vendor/brotli/enc/compress_fragment.h +48 -37
- data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
- data/vendor/brotli/enc/compressor.cc +139 -0
- data/vendor/brotli/enc/compressor.h +146 -0
- data/vendor/brotli/enc/context.h +102 -96
- data/vendor/brotli/enc/dictionary_hash.h +9 -5
- data/vendor/brotli/enc/encode.c +1562 -0
- data/vendor/brotli/enc/encode.h +211 -199
- data/vendor/brotli/enc/encode_parallel.cc +161 -151
- data/vendor/brotli/enc/encode_parallel.h +7 -8
- data/vendor/brotli/enc/entropy_encode.c +501 -0
- data/vendor/brotli/enc/entropy_encode.h +107 -89
- data/vendor/brotli/enc/entropy_encode_static.h +29 -62
- data/vendor/brotli/enc/fast_log.h +26 -20
- data/vendor/brotli/enc/find_match_length.h +23 -20
- data/vendor/brotli/enc/hash.h +614 -871
- data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
- data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
- data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
- data/vendor/brotli/enc/histogram.c +95 -0
- data/vendor/brotli/enc/histogram.h +49 -83
- data/vendor/brotli/enc/histogram_inc.h +51 -0
- data/vendor/brotli/enc/literal_cost.c +178 -0
- data/vendor/brotli/enc/literal_cost.h +16 -10
- data/vendor/brotli/enc/memory.c +181 -0
- data/vendor/brotli/enc/memory.h +62 -0
- data/vendor/brotli/enc/metablock.c +515 -0
- data/vendor/brotli/enc/metablock.h +87 -57
- data/vendor/brotli/enc/metablock_inc.h +183 -0
- data/vendor/brotli/enc/port.h +73 -47
- data/vendor/brotli/enc/prefix.h +34 -61
- data/vendor/brotli/enc/quality.h +130 -0
- data/vendor/brotli/enc/ringbuffer.h +137 -122
- data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
- data/vendor/brotli/enc/static_dict.h +23 -18
- data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
- data/vendor/brotli/enc/streams.cc +7 -7
- data/vendor/brotli/enc/streams.h +32 -32
- data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
- data/vendor/brotli/enc/utf8_util.h +16 -9
- data/vendor/brotli/enc/write_bits.h +49 -43
- metadata +34 -25
- data/ext/brotli/brotli.cc +0 -181
- data/vendor/brotli/dec/Makefile +0 -12
- data/vendor/brotli/dec/dictionary.c +0 -9466
- data/vendor/brotli/dec/dictionary.h +0 -38
- data/vendor/brotli/dec/types.h +0 -38
- data/vendor/brotli/enc/Makefile +0 -14
- data/vendor/brotli/enc/backward_references.cc +0 -858
- data/vendor/brotli/enc/block_splitter.cc +0 -505
- data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
- data/vendor/brotli/enc/compress_fragment.cc +0 -701
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
- data/vendor/brotli/enc/dictionary.cc +0 -9466
- data/vendor/brotli/enc/dictionary.h +0 -41
- data/vendor/brotli/enc/encode.cc +0 -1180
- data/vendor/brotli/enc/entropy_encode.cc +0 -480
- data/vendor/brotli/enc/histogram.cc +0 -67
- data/vendor/brotli/enc/literal_cost.cc +0 -165
- data/vendor/brotli/enc/metablock.cc +0 -539
- data/vendor/brotli/enc/transform.h +0 -248
- data/vendor/brotli/enc/types.h +0 -29
@@ -6,88 +6,103 @@
|
|
6
6
|
|
7
7
|
#include "./static_dict.h"
|
8
8
|
|
9
|
-
#include
|
10
|
-
|
11
|
-
#include "./dictionary.h"
|
9
|
+
#include "../common/dictionary.h"
|
12
10
|
#include "./find_match_length.h"
|
11
|
+
#include "./port.h"
|
13
12
|
#include "./static_dict_lut.h"
|
14
|
-
#include "./transform.h"
|
15
13
|
|
16
|
-
|
14
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
15
|
+
extern "C" {
|
16
|
+
#endif
|
17
|
+
|
18
|
+
static const uint8_t kUppercaseFirst = 10;
|
19
|
+
static const uint8_t kOmitLastNTransforms[10] = {
|
20
|
+
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
21
|
+
};
|
17
22
|
|
18
|
-
|
23
|
+
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
|
19
24
|
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
20
|
-
|
21
|
-
|
25
|
+
/* The higher bits contain more mixture from the multiplication,
|
26
|
+
so we take our results from there. */
|
22
27
|
return h >> (32 - kDictNumBits);
|
23
28
|
}
|
24
29
|
|
25
|
-
|
26
|
-
|
27
|
-
uint32_t match =
|
28
|
-
matches[len] =
|
30
|
+
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
|
31
|
+
uint32_t* matches) {
|
32
|
+
uint32_t match = (uint32_t)((distance << 5) + len_code);
|
33
|
+
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
|
29
34
|
}
|
30
35
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
36
|
+
static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
|
37
|
+
size_t id,
|
38
|
+
size_t len,
|
39
|
+
size_t maxlen) {
|
35
40
|
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
36
41
|
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
37
|
-
|
42
|
+
BROTLI_MIN(size_t, len, maxlen));
|
38
43
|
}
|
39
44
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
if (w.transform == 0) {
|
45
|
-
// Match against base dictionary word.
|
46
|
-
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
47
|
-
} else if (w.transform == 10) {
|
48
|
-
// Match against uppercase first transform.
|
49
|
-
// Note that there are only ASCII uppercase words in the lookup table.
|
50
|
-
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
51
|
-
(dict[0] ^ 32) == data[0] &&
|
52
|
-
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
53
|
-
w.len - 1u);
|
45
|
+
static BROTLI_INLINE BROTLI_BOOL IsMatch(
|
46
|
+
DictWord w, const uint8_t* data, size_t max_length) {
|
47
|
+
if (w.len > max_length) {
|
48
|
+
return BROTLI_FALSE;
|
54
49
|
} else {
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
50
|
+
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
|
51
|
+
(size_t)w.len * (size_t)w.idx;
|
52
|
+
const uint8_t* dict = &kBrotliDictionary[offset];
|
53
|
+
if (w.transform == 0) {
|
54
|
+
/* Match against base dictionary word. */
|
55
|
+
return
|
56
|
+
TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
|
57
|
+
} else if (w.transform == 10) {
|
58
|
+
/* Match against uppercase first transform.
|
59
|
+
Note that there are only ASCII uppercase words in the lookup table. */
|
60
|
+
return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
|
61
|
+
(dict[0] ^ 32) == data[0] &&
|
62
|
+
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
63
|
+
w.len - 1u);
|
64
|
+
} else {
|
65
|
+
/* Match against uppercase all transform.
|
66
|
+
Note that there are only ASCII uppercase words in the lookup table. */
|
67
|
+
size_t i;
|
68
|
+
for (i = 0; i < w.len; ++i) {
|
69
|
+
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
70
|
+
if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
|
71
|
+
} else {
|
72
|
+
if (dict[i] != data[i]) return BROTLI_FALSE;
|
73
|
+
}
|
62
74
|
}
|
75
|
+
return BROTLI_TRUE;
|
63
76
|
}
|
64
|
-
return true;
|
65
77
|
}
|
66
78
|
}
|
67
79
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
const DictWord w = kStaticDictionaryWords[offset + i];
|
80
|
-
const size_t l = w.len;
|
81
|
-
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
80
|
+
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
81
|
+
const uint8_t* data, size_t min_length, size_t max_length,
|
82
|
+
uint32_t* matches) {
|
83
|
+
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
84
|
+
{
|
85
|
+
size_t offset = kStaticDictionaryBuckets[Hash(data)];
|
86
|
+
BROTLI_BOOL end = !offset;
|
87
|
+
while (!end) {
|
88
|
+
DictWord w = kStaticDictionaryWords[offset++];
|
89
|
+
const size_t l = w.len & 0x7F;
|
90
|
+
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
82
91
|
const size_t id = w.idx;
|
92
|
+
end = !!(w.len & 0x80);
|
93
|
+
w.len = (uint8_t)l;
|
83
94
|
if (w.transform == 0) {
|
84
95
|
const size_t matchlen = DictMatchLength(data, id, l, max_length);
|
85
|
-
|
96
|
+
const uint8_t* s;
|
97
|
+
size_t minlen;
|
98
|
+
size_t maxlen;
|
99
|
+
size_t len;
|
100
|
+
/* Transform "" + kIdentity + "" */
|
86
101
|
if (matchlen == l) {
|
87
102
|
AddMatch(id, l, l, matches);
|
88
|
-
|
103
|
+
has_found_match = BROTLI_TRUE;
|
89
104
|
}
|
90
|
-
|
105
|
+
/* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
|
91
106
|
if (matchlen >= l - 1) {
|
92
107
|
AddMatch(id + 12 * n, l - 1, l, matches);
|
93
108
|
if (l + 2 < max_length &&
|
@@ -95,21 +110,21 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
95
110
|
data[l + 2] == ' ') {
|
96
111
|
AddMatch(id + 49 * n, l + 3, l, matches);
|
97
112
|
}
|
98
|
-
|
113
|
+
has_found_match = BROTLI_TRUE;
|
99
114
|
}
|
100
|
-
|
101
|
-
|
102
|
-
if (l > 9) minlen =
|
103
|
-
|
104
|
-
for (
|
115
|
+
/* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
|
116
|
+
minlen = min_length;
|
117
|
+
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
118
|
+
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
119
|
+
for (len = minlen; len <= maxlen; ++len) {
|
105
120
|
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
106
|
-
|
121
|
+
has_found_match = BROTLI_TRUE;
|
107
122
|
}
|
108
123
|
if (matchlen < l || l + 6 >= max_length) {
|
109
124
|
continue;
|
110
125
|
}
|
111
|
-
|
112
|
-
|
126
|
+
s = &data[l];
|
127
|
+
/* Transforms "" + kIdentity + <suffix> */
|
113
128
|
if (s[0] == ' ') {
|
114
129
|
AddMatch(id + n, l + 1, l, matches);
|
115
130
|
if (s[1] == 'a') {
|
@@ -127,7 +142,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
127
142
|
} else if (s[1] == 'b') {
|
128
143
|
if (s[2] == 'y' && s[3] == ' ') {
|
129
144
|
AddMatch(id + 38 * n, l + 4, l, matches);
|
130
|
-
|
145
|
+
}
|
131
146
|
} else if (s[1] == 'i') {
|
132
147
|
if (s[2] == 'n') {
|
133
148
|
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
@@ -235,7 +250,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
235
250
|
} else if (s[0] == 'i') {
|
236
251
|
if (s[1] == 'v') {
|
237
252
|
if (s[2] == 'e' && s[3] == ' ') {
|
238
|
-
|
253
|
+
AddMatch(id + 92 * n, l + 4, l, matches);
|
239
254
|
}
|
240
255
|
} else if (s[1] == 'z') {
|
241
256
|
if (s[2] == 'e' && s[3] == ' ') {
|
@@ -256,75 +271,79 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
256
271
|
}
|
257
272
|
}
|
258
273
|
} else {
|
259
|
-
|
260
|
-
|
261
|
-
const
|
274
|
+
/* Set is_all_caps=0 for kUppercaseFirst and
|
275
|
+
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
276
|
+
const BROTLI_BOOL is_all_caps =
|
277
|
+
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
|
278
|
+
const uint8_t* s;
|
262
279
|
if (!IsMatch(w, data, max_length)) {
|
263
280
|
continue;
|
264
281
|
}
|
265
|
-
|
266
|
-
AddMatch(id + (
|
267
|
-
|
282
|
+
/* Transform "" + kUppercase{First,All} + "" */
|
283
|
+
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
|
284
|
+
has_found_match = BROTLI_TRUE;
|
268
285
|
if (l + 1 >= max_length) {
|
269
286
|
continue;
|
270
287
|
}
|
271
|
-
|
272
|
-
|
288
|
+
/* Transforms "" + kUppercase{First,All} + <suffix> */
|
289
|
+
s = &data[l];
|
273
290
|
if (s[0] == ' ') {
|
274
|
-
AddMatch(id + (
|
291
|
+
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
|
275
292
|
} else if (s[0] == '"') {
|
276
|
-
AddMatch(id + (
|
293
|
+
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
|
277
294
|
if (s[1] == '>') {
|
278
|
-
AddMatch(id + (
|
295
|
+
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
|
279
296
|
}
|
280
297
|
} else if (s[0] == '.') {
|
281
|
-
AddMatch(id + (
|
298
|
+
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
|
282
299
|
if (s[1] == ' ') {
|
283
|
-
AddMatch(id + (
|
300
|
+
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
|
284
301
|
}
|
285
302
|
} else if (s[0] == ',') {
|
286
|
-
AddMatch(id + (
|
303
|
+
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
|
287
304
|
if (s[1] == ' ') {
|
288
|
-
AddMatch(id + (
|
305
|
+
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
|
289
306
|
}
|
290
307
|
} else if (s[0] == '\'') {
|
291
|
-
AddMatch(id + (
|
308
|
+
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
|
292
309
|
} else if (s[0] == '(') {
|
293
|
-
AddMatch(id + (
|
310
|
+
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
|
294
311
|
} else if (s[0] == '=') {
|
295
312
|
if (s[1] == '"') {
|
296
|
-
AddMatch(id + (
|
313
|
+
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
|
297
314
|
} else if (s[1] == '\'') {
|
298
|
-
AddMatch(id + (
|
315
|
+
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
|
299
316
|
}
|
300
317
|
}
|
301
318
|
}
|
302
319
|
}
|
303
320
|
}
|
304
|
-
|
321
|
+
/* Transforms with prefixes " " and "." */
|
305
322
|
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
const
|
313
|
-
const size_t l = w.len;
|
314
|
-
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
323
|
+
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
324
|
+
size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
|
325
|
+
BROTLI_BOOL end = !offset;
|
326
|
+
while (!end) {
|
327
|
+
DictWord w = kStaticDictionaryWords[offset++];
|
328
|
+
const size_t l = w.len & 0x7F;
|
329
|
+
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
315
330
|
const size_t id = w.idx;
|
331
|
+
end = !!(w.len & 0x80);
|
332
|
+
w.len = (uint8_t)l;
|
316
333
|
if (w.transform == 0) {
|
334
|
+
const uint8_t* s;
|
317
335
|
if (!IsMatch(w, &data[1], max_length - 1)) {
|
318
336
|
continue;
|
319
337
|
}
|
320
|
-
|
338
|
+
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
|
321
339
|
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
322
|
-
|
340
|
+
has_found_match = BROTLI_TRUE;
|
323
341
|
if (l + 2 >= max_length) {
|
324
342
|
continue;
|
325
343
|
}
|
326
|
-
|
327
|
-
|
344
|
+
/* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
345
|
+
*/
|
346
|
+
s = &data[l + 1];
|
328
347
|
if (s[0] == ' ') {
|
329
348
|
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
330
349
|
} else if (s[0] == '(') {
|
@@ -349,89 +368,91 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
349
368
|
}
|
350
369
|
}
|
351
370
|
} else if (is_space) {
|
352
|
-
|
353
|
-
|
354
|
-
const
|
371
|
+
/* Set is_all_caps=0 for kUppercaseFirst and
|
372
|
+
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
373
|
+
const BROTLI_BOOL is_all_caps =
|
374
|
+
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
|
375
|
+
const uint8_t* s;
|
355
376
|
if (!IsMatch(w, &data[1], max_length - 1)) {
|
356
377
|
continue;
|
357
378
|
}
|
358
|
-
|
359
|
-
AddMatch(id + (
|
360
|
-
|
379
|
+
/* Transforms " " + kUppercase{First,All} + "" */
|
380
|
+
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
|
381
|
+
has_found_match = BROTLI_TRUE;
|
361
382
|
if (l + 2 >= max_length) {
|
362
383
|
continue;
|
363
384
|
}
|
364
|
-
|
365
|
-
|
385
|
+
/* Transforms " " + kUppercase{First,All} + <suffix> */
|
386
|
+
s = &data[l + 1];
|
366
387
|
if (s[0] == ' ') {
|
367
|
-
AddMatch(id + (
|
388
|
+
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
|
368
389
|
} else if (s[0] == ',') {
|
369
|
-
if (!
|
390
|
+
if (!is_all_caps) {
|
370
391
|
AddMatch(id + 109 * n, l + 2, l, matches);
|
371
|
-
|
392
|
+
}
|
372
393
|
if (s[1] == ' ') {
|
373
|
-
AddMatch(id + (
|
394
|
+
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
|
374
395
|
}
|
375
396
|
} else if (s[0] == '.') {
|
376
|
-
AddMatch(id + (
|
397
|
+
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
|
377
398
|
if (s[1] == ' ') {
|
378
|
-
AddMatch(id + (
|
399
|
+
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
|
379
400
|
}
|
380
401
|
} else if (s[0] == '=') {
|
381
402
|
if (s[1] == '"') {
|
382
|
-
AddMatch(id + (
|
403
|
+
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
|
383
404
|
} else if (s[1] == '\'') {
|
384
|
-
AddMatch(id + (
|
405
|
+
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
|
385
406
|
}
|
386
407
|
}
|
387
408
|
}
|
388
409
|
}
|
389
410
|
}
|
390
411
|
if (max_length >= 6) {
|
391
|
-
|
412
|
+
/* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
|
392
413
|
if ((data[1] == ' ' &&
|
393
414
|
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
394
415
|
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
const
|
401
|
-
const size_t l = w.len;
|
402
|
-
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
416
|
+
size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
|
417
|
+
BROTLI_BOOL end = !offset;
|
418
|
+
while (!end) {
|
419
|
+
DictWord w = kStaticDictionaryWords[offset++];
|
420
|
+
const size_t l = w.len & 0x7F;
|
421
|
+
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
403
422
|
const size_t id = w.idx;
|
423
|
+
end = !!(w.len & 0x80);
|
424
|
+
w.len = (uint8_t)l;
|
404
425
|
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
405
426
|
if (data[0] == 0xc2) {
|
406
427
|
AddMatch(id + 102 * n, l + 2, l, matches);
|
407
|
-
|
428
|
+
has_found_match = BROTLI_TRUE;
|
408
429
|
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
409
430
|
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
410
431
|
AddMatch(id + t * n, l + 3, l, matches);
|
411
|
-
|
432
|
+
has_found_match = BROTLI_TRUE;
|
412
433
|
}
|
413
434
|
}
|
414
435
|
}
|
415
436
|
}
|
416
437
|
}
|
417
438
|
if (max_length >= 9) {
|
418
|
-
|
439
|
+
/* Transforms with prefixes " the " and ".com/" */
|
419
440
|
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
420
441
|
data[3] == 'e' && data[4] == ' ') ||
|
421
442
|
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
422
443
|
data[3] == 'm' && data[4] == '/')) {
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
const
|
429
|
-
const size_t l = w.len;
|
430
|
-
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
444
|
+
size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
|
445
|
+
BROTLI_BOOL end = !offset;
|
446
|
+
while (!end) {
|
447
|
+
DictWord w = kStaticDictionaryWords[offset++];
|
448
|
+
const size_t l = w.len & 0x7F;
|
449
|
+
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
431
450
|
const size_t id = w.idx;
|
451
|
+
end = !!(w.len & 0x80);
|
452
|
+
w.len = (uint8_t)l;
|
432
453
|
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
433
454
|
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
434
|
-
|
455
|
+
has_found_match = BROTLI_TRUE;
|
435
456
|
if (l + 5 < max_length) {
|
436
457
|
const uint8_t* s = &data[l + 5];
|
437
458
|
if (data[0] == ' ') {
|
@@ -449,7 +470,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
|
449
470
|
}
|
450
471
|
}
|
451
472
|
}
|
452
|
-
return
|
473
|
+
return has_found_match;
|
453
474
|
}
|
454
475
|
|
455
|
-
|
476
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
477
|
+
} /* extern "C" */
|
478
|
+
#endif
|
@@ -4,29 +4,34 @@
|
|
4
4
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
5
|
*/
|
6
6
|
|
7
|
-
|
7
|
+
/* Class to model the static dictionary. */
|
8
8
|
|
9
9
|
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
10
10
|
#define BROTLI_ENC_STATIC_DICT_H_
|
11
11
|
|
12
|
-
#include "
|
12
|
+
#include "../common/types.h"
|
13
|
+
#include "./port.h"
|
13
14
|
|
14
|
-
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
15
18
|
|
16
|
-
|
19
|
+
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
|
17
20
|
static const uint32_t kInvalidMatch = 0xfffffff;
|
18
21
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
#endif
|
22
|
+
/* Matches data against static dictionary words, and for each length l,
|
23
|
+
for which a match is found, updates matches[l] to be the minimum possible
|
24
|
+
(distance << 5) + len_code.
|
25
|
+
Returns 1 if matches have been found, otherwise 0.
|
26
|
+
Prerequisites:
|
27
|
+
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
28
|
+
all elements are initialized to kInvalidMatch */
|
29
|
+
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
30
|
+
const uint8_t* data, size_t min_length, size_t max_length,
|
31
|
+
uint32_t* matches);
|
32
|
+
|
33
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
34
|
+
} /* extern "C" */
|
35
|
+
#endif
|
36
|
+
|
37
|
+
#endif /* BROTLI_ENC_STATIC_DICT_H_ */
|