extbrotli 0.0.1.PROTOTYPE
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
// Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Algorithms for distributing the literals and commands of a metablock between
|
16
|
+
// block types and contexts.
|
17
|
+
|
18
|
+
#ifndef BROTLI_ENC_METABLOCK_H_
|
19
|
+
#define BROTLI_ENC_METABLOCK_H_
|
20
|
+
|
21
|
+
#include <vector>
|
22
|
+
|
23
|
+
#include "./command.h"
|
24
|
+
#include "./histogram.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
struct BlockSplit {
|
29
|
+
BlockSplit() : num_types(0) {}
|
30
|
+
|
31
|
+
int num_types;
|
32
|
+
std::vector<int> types;
|
33
|
+
std::vector<int> lengths;
|
34
|
+
};
|
35
|
+
|
36
|
+
struct MetaBlockSplit {
|
37
|
+
BlockSplit literal_split;
|
38
|
+
BlockSplit command_split;
|
39
|
+
BlockSplit distance_split;
|
40
|
+
std::vector<int> literal_context_map;
|
41
|
+
std::vector<int> distance_context_map;
|
42
|
+
std::vector<HistogramLiteral> literal_histograms;
|
43
|
+
std::vector<HistogramCommand> command_histograms;
|
44
|
+
std::vector<HistogramDistance> distance_histograms;
|
45
|
+
};
|
46
|
+
|
47
|
+
// Uses the slow shortest-path block splitter and does context clustering.
|
48
|
+
void BuildMetaBlock(const uint8_t* ringbuffer,
|
49
|
+
const size_t pos,
|
50
|
+
const size_t mask,
|
51
|
+
uint8_t prev_byte,
|
52
|
+
uint8_t prev_byte2,
|
53
|
+
const Command* cmds,
|
54
|
+
size_t num_commands,
|
55
|
+
int literal_context_mode,
|
56
|
+
MetaBlockSplit* mb);
|
57
|
+
|
58
|
+
// Uses a fast greedy block splitter that tries to merge current block with the
|
59
|
+
// last or the second last block and does not do any context modeling.
|
60
|
+
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
61
|
+
size_t pos,
|
62
|
+
size_t mask,
|
63
|
+
const Command *commands,
|
64
|
+
size_t n_commands,
|
65
|
+
MetaBlockSplit* mb);
|
66
|
+
|
67
|
+
// Uses a fast greedy block splitter that tries to merge current block with the
|
68
|
+
// last or the second last block and uses a static context clustering which
|
69
|
+
// is the same for all block types.
|
70
|
+
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
71
|
+
size_t pos,
|
72
|
+
size_t mask,
|
73
|
+
uint8_t prev_byte,
|
74
|
+
uint8_t prev_byte2,
|
75
|
+
int literal_context_mode,
|
76
|
+
int num_contexts,
|
77
|
+
const int* static_context_map,
|
78
|
+
const Command *commands,
|
79
|
+
size_t n_commands,
|
80
|
+
MetaBlockSplit* mb);
|
81
|
+
|
82
|
+
void OptimizeHistograms(int num_direct_distance_codes,
|
83
|
+
int distance_postfix_bits,
|
84
|
+
MetaBlockSplit* mb);
|
85
|
+
|
86
|
+
} // namespace brotli
|
87
|
+
|
88
|
+
#endif // BROTLI_ENC_METABLOCK_H_
|
@@ -0,0 +1,151 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Macros for endianness, branch prediction and unaligned loads and stores.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_PORT_H_
|
18
|
+
#define BROTLI_ENC_PORT_H_
|
19
|
+
|
20
|
+
#include <string.h>
|
21
|
+
|
22
|
+
#if defined OS_LINUX || defined OS_CYGWIN
|
23
|
+
#include <endian.h>
|
24
|
+
#elif defined OS_FREEBSD
|
25
|
+
#include <machine/endian.h>
|
26
|
+
#elif defined OS_MACOSX
|
27
|
+
#include <machine/endian.h>
|
28
|
+
/* Let's try and follow the Linux convention */
|
29
|
+
#define __BYTE_ORDER BYTE_ORDER
|
30
|
+
#define __LITTLE_ENDIAN LITTLE_ENDIAN
|
31
|
+
#define __BIG_ENDIAN BIG_ENDIAN
|
32
|
+
#endif
|
33
|
+
|
34
|
+
// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
|
35
|
+
// using the above endian definitions from endian.h if
|
36
|
+
// endian.h was included
|
37
|
+
#ifdef __BYTE_ORDER
|
38
|
+
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
39
|
+
#define IS_LITTLE_ENDIAN
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#if __BYTE_ORDER == __BIG_ENDIAN
|
43
|
+
#define IS_BIG_ENDIAN
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#else
|
47
|
+
|
48
|
+
#if defined(__LITTLE_ENDIAN__)
|
49
|
+
#define IS_LITTLE_ENDIAN
|
50
|
+
#elif defined(__BIG_ENDIAN__)
|
51
|
+
#define IS_BIG_ENDIAN
|
52
|
+
#endif
|
53
|
+
#endif // __BYTE_ORDER
|
54
|
+
|
55
|
+
// Enable little-endian optimization for x64 architecture on Windows.
|
56
|
+
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
|
57
|
+
#define IS_LITTLE_ENDIAN
|
58
|
+
#endif
|
59
|
+
|
60
|
+
/* Compatibility with non-clang compilers. */
|
61
|
+
#ifndef __has_builtin
|
62
|
+
#define __has_builtin(x) 0
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
|
66
|
+
(defined(__llvm__) && __has_builtin(__builtin_expect))
|
67
|
+
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
68
|
+
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
69
|
+
#else
|
70
|
+
#define PREDICT_FALSE(x) (x)
|
71
|
+
#define PREDICT_TRUE(x) (x)
|
72
|
+
#endif
|
73
|
+
|
74
|
+
// Portable handling of unaligned loads, stores, and copies.
|
75
|
+
// On some platforms, like ARM, the copy functions can be more efficient
|
76
|
+
// then a load and a store.
|
77
|
+
|
78
|
+
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
|
79
|
+
defined(ARCH_K8) || defined(_ARCH_PPC)
|
80
|
+
|
81
|
+
// x86 and x86-64 can perform unaligned loads/stores directly;
|
82
|
+
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
83
|
+
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
84
|
+
|
85
|
+
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
86
|
+
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
|
87
|
+
|
88
|
+
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
89
|
+
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
90
|
+
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
|
91
|
+
(*reinterpret_cast<uint64_t *>(_p) = (_val))
|
92
|
+
|
93
|
+
#elif defined(__arm__) && \
|
94
|
+
!defined(__ARM_ARCH_5__) && \
|
95
|
+
!defined(__ARM_ARCH_5T__) && \
|
96
|
+
!defined(__ARM_ARCH_5TE__) && \
|
97
|
+
!defined(__ARM_ARCH_5TEJ__) && \
|
98
|
+
!defined(__ARM_ARCH_6__) && \
|
99
|
+
!defined(__ARM_ARCH_6J__) && \
|
100
|
+
!defined(__ARM_ARCH_6K__) && \
|
101
|
+
!defined(__ARM_ARCH_6Z__) && \
|
102
|
+
!defined(__ARM_ARCH_6ZK__) && \
|
103
|
+
!defined(__ARM_ARCH_6T2__)
|
104
|
+
|
105
|
+
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
106
|
+
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
107
|
+
// do an unaligned read and rotate the words around a bit, or do the reads very
|
108
|
+
// slowly (trip through kernel mode).
|
109
|
+
|
110
|
+
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
111
|
+
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
112
|
+
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
113
|
+
|
114
|
+
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
115
|
+
uint64_t t;
|
116
|
+
memcpy(&t, p, sizeof t);
|
117
|
+
return t;
|
118
|
+
}
|
119
|
+
|
120
|
+
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
121
|
+
memcpy(p, &v, sizeof v);
|
122
|
+
}
|
123
|
+
|
124
|
+
#else
|
125
|
+
|
126
|
+
// These functions are provided for architectures that don't support
|
127
|
+
// unaligned loads and stores.
|
128
|
+
|
129
|
+
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
130
|
+
uint32_t t;
|
131
|
+
memcpy(&t, p, sizeof t);
|
132
|
+
return t;
|
133
|
+
}
|
134
|
+
|
135
|
+
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
136
|
+
uint64_t t;
|
137
|
+
memcpy(&t, p, sizeof t);
|
138
|
+
return t;
|
139
|
+
}
|
140
|
+
|
141
|
+
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
142
|
+
memcpy(p, &v, sizeof v);
|
143
|
+
}
|
144
|
+
|
145
|
+
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
146
|
+
memcpy(p, &v, sizeof v);
|
147
|
+
}
|
148
|
+
|
149
|
+
#endif
|
150
|
+
|
151
|
+
#endif // BROTLI_ENC_PORT_H_
|
@@ -0,0 +1,85 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Functions for encoding of integers into prefix codes the amount of extra
|
16
|
+
// bits, and the actual values of the extra bits.
|
17
|
+
|
18
|
+
#ifndef BROTLI_ENC_PREFIX_H_
|
19
|
+
#define BROTLI_ENC_PREFIX_H_
|
20
|
+
|
21
|
+
#include <stdint.h>
|
22
|
+
#include "./fast_log.h"
|
23
|
+
|
24
|
+
namespace brotli {
|
25
|
+
|
26
|
+
static const int kNumInsertLenPrefixes = 24;
|
27
|
+
static const int kNumCopyLenPrefixes = 24;
|
28
|
+
static const int kNumCommandPrefixes = 704;
|
29
|
+
static const int kNumBlockLenPrefixes = 26;
|
30
|
+
static const int kNumDistanceShortCodes = 16;
|
31
|
+
static const int kNumDistancePrefixes = 520;
|
32
|
+
|
33
|
+
// Represents the range of values belonging to a prefix code:
|
34
|
+
// [offset, offset + 2^nbits)
|
35
|
+
struct PrefixCodeRange {
|
36
|
+
int offset;
|
37
|
+
int nbits;
|
38
|
+
};
|
39
|
+
|
40
|
+
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
41
|
+
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
42
|
+
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
43
|
+
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
|
44
|
+
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
|
45
|
+
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
|
46
|
+
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
|
47
|
+
{8433, 13}, {16625, 24}
|
48
|
+
};
|
49
|
+
|
50
|
+
inline void GetBlockLengthPrefixCode(int len,
|
51
|
+
int* code, int* n_extra, int* extra) {
|
52
|
+
*code = 0;
|
53
|
+
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
|
54
|
+
++(*code);
|
55
|
+
}
|
56
|
+
*n_extra = kBlockLengthPrefixCode[*code].nbits;
|
57
|
+
*extra = len - kBlockLengthPrefixCode[*code].offset;
|
58
|
+
}
|
59
|
+
|
60
|
+
inline void PrefixEncodeCopyDistance(int distance_code,
|
61
|
+
int num_direct_codes,
|
62
|
+
int postfix_bits,
|
63
|
+
uint16_t* code,
|
64
|
+
uint32_t* extra_bits) {
|
65
|
+
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
66
|
+
*code = distance_code;
|
67
|
+
*extra_bits = 0;
|
68
|
+
return;
|
69
|
+
}
|
70
|
+
distance_code -= kNumDistanceShortCodes + num_direct_codes;
|
71
|
+
distance_code += (1 << (postfix_bits + 2));
|
72
|
+
int bucket = Log2Floor(distance_code) - 1;
|
73
|
+
int postfix_mask = (1 << postfix_bits) - 1;
|
74
|
+
int postfix = distance_code & postfix_mask;
|
75
|
+
int prefix = (distance_code >> bucket) & 1;
|
76
|
+
int offset = (2 + prefix) << bucket;
|
77
|
+
int nbits = bucket - postfix_bits;
|
78
|
+
*code = kNumDistanceShortCodes + num_direct_codes +
|
79
|
+
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix;
|
80
|
+
*extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
|
81
|
+
}
|
82
|
+
|
83
|
+
} // namespace brotli
|
84
|
+
|
85
|
+
#endif // BROTLI_ENC_PREFIX_H_
|
@@ -0,0 +1,108 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Sliding window over the input data.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
18
|
+
#define BROTLI_ENC_RINGBUFFER_H_
|
19
|
+
|
20
|
+
#include <stddef.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
|
23
|
+
#include "./port.h"
|
24
|
+
|
25
|
+
namespace brotli {
|
26
|
+
|
27
|
+
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
28
|
+
// data in a circular manner: writing a byte writes it to
|
29
|
+
// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
|
30
|
+
// contains another copy of the first `1 << tail_bits' bytes:
|
31
|
+
// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
|
32
|
+
class RingBuffer {
|
33
|
+
public:
|
34
|
+
RingBuffer(int window_bits, int tail_bits)
|
35
|
+
: window_bits_(window_bits),
|
36
|
+
mask_((1 << window_bits) - 1),
|
37
|
+
tail_size_(1 << tail_bits),
|
38
|
+
pos_(0) {
|
39
|
+
static const int kSlackForEightByteHashingEverywhere = 7;
|
40
|
+
const int buflen = (1 << window_bits_) + tail_size_;
|
41
|
+
buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere];
|
42
|
+
for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
43
|
+
buffer_[buflen + i] = 0;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
~RingBuffer() {
|
47
|
+
delete [] buffer_;
|
48
|
+
}
|
49
|
+
|
50
|
+
// Push bytes into the ring buffer.
|
51
|
+
void Write(const uint8_t *bytes, size_t n) {
|
52
|
+
const size_t masked_pos = pos_ & mask_;
|
53
|
+
// The length of the writes is limited so that we do not need to worry
|
54
|
+
// about a write
|
55
|
+
WriteTail(bytes, n);
|
56
|
+
if (PREDICT_TRUE(masked_pos + n <= (1 << window_bits_))) {
|
57
|
+
// A single write fits.
|
58
|
+
memcpy(&buffer_[masked_pos], bytes, n);
|
59
|
+
} else {
|
60
|
+
// Split into two writes.
|
61
|
+
// Copy into the end of the buffer, including the tail buffer.
|
62
|
+
memcpy(&buffer_[masked_pos], bytes,
|
63
|
+
std::min(n, ((1 << window_bits_) + tail_size_) - masked_pos));
|
64
|
+
// Copy into the begining of the buffer
|
65
|
+
memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
|
66
|
+
n - ((1 << window_bits_) - masked_pos));
|
67
|
+
}
|
68
|
+
pos_ += n;
|
69
|
+
}
|
70
|
+
|
71
|
+
void Reset() {
|
72
|
+
pos_ = 0;
|
73
|
+
}
|
74
|
+
|
75
|
+
// Logical cursor position in the ring buffer.
|
76
|
+
size_t position() const { return pos_; }
|
77
|
+
|
78
|
+
// Bit mask for getting the physical position for a logical position.
|
79
|
+
size_t mask() const { return mask_; }
|
80
|
+
|
81
|
+
uint8_t *start() { return &buffer_[0]; }
|
82
|
+
const uint8_t *start() const { return &buffer_[0]; }
|
83
|
+
|
84
|
+
private:
|
85
|
+
void WriteTail(const uint8_t *bytes, size_t n) {
|
86
|
+
const size_t masked_pos = pos_ & mask_;
|
87
|
+
if (PREDICT_FALSE(masked_pos < tail_size_)) {
|
88
|
+
// Just fill the tail buffer with the beginning data.
|
89
|
+
const size_t p = (1 << window_bits_) + masked_pos;
|
90
|
+
memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
|
95
|
+
const int window_bits_;
|
96
|
+
const size_t mask_;
|
97
|
+
const size_t tail_size_;
|
98
|
+
|
99
|
+
// Position to write in the ring buffer.
|
100
|
+
size_t pos_;
|
101
|
+
// The actual ring buffer containing the data and the copy of the beginning
|
102
|
+
// as a tail.
|
103
|
+
uint8_t *buffer_;
|
104
|
+
};
|
105
|
+
|
106
|
+
} // namespace brotli
|
107
|
+
|
108
|
+
#endif // BROTLI_ENC_RINGBUFFER_H_
|
@@ -0,0 +1,441 @@
|
|
1
|
+
#include "./static_dict.h"
|
2
|
+
|
3
|
+
#include <algorithm>
|
4
|
+
|
5
|
+
#include "./dictionary.h"
|
6
|
+
#include "./find_match_length.h"
|
7
|
+
#include "./static_dict_lut.h"
|
8
|
+
#include "./transform.h"
|
9
|
+
|
10
|
+
namespace brotli {
|
11
|
+
|
12
|
+
inline uint32_t Hash(const uint8_t *data) {
|
13
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
14
|
+
// The higher bits contain more mixture from the multiplication,
|
15
|
+
// so we take our results from there.
|
16
|
+
return h >> (32 - kDictNumBits);
|
17
|
+
}
|
18
|
+
|
19
|
+
inline void AddMatch(int distance, int len, int len_code, int* matches) {
|
20
|
+
matches[len] = std::min(matches[len], (distance << 5) + len_code);
|
21
|
+
}
|
22
|
+
|
23
|
+
inline int DictMatchLength(const uint8_t* data, int id, int len, int maxlen) {
|
24
|
+
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
25
|
+
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
26
|
+
std::min(len, maxlen));
|
27
|
+
}
|
28
|
+
|
29
|
+
inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
|
30
|
+
if (w.len > max_length) return false;
|
31
|
+
const int offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
|
32
|
+
const uint8_t* dict = &kBrotliDictionary[offset];
|
33
|
+
if (w.transform == 0) {
|
34
|
+
// Match against base dictionary word.
|
35
|
+
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
36
|
+
} else if (w.transform == 10) {
|
37
|
+
// Match against uppercase first transform.
|
38
|
+
// Note that there are only ASCII uppercase words in the lookup table.
|
39
|
+
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
40
|
+
(dict[0] ^ 32) == data[0] &&
|
41
|
+
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1) ==
|
42
|
+
w.len - 1);
|
43
|
+
} else {
|
44
|
+
// Match against uppercase all transform.
|
45
|
+
// Note that there are only ASCII uppercase words in the lookup table.
|
46
|
+
for (int i = 0; i < w.len; ++i) {
|
47
|
+
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
48
|
+
if ((dict[i] ^ 32) != data[i]) return false;
|
49
|
+
} else {
|
50
|
+
if (dict[i] != data[i]) return false;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
return true;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
58
|
+
int min_length,
|
59
|
+
int max_length,
|
60
|
+
int* matches) {
|
61
|
+
bool found_match = false;
|
62
|
+
uint32_t key = Hash(data);
|
63
|
+
uint32_t bucket = kStaticDictionaryBuckets[key];
|
64
|
+
if (bucket != 0) {
|
65
|
+
int num = bucket & 0xff;
|
66
|
+
int offset = bucket >> 8;
|
67
|
+
for (int i = 0; i < num; ++i) {
|
68
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
69
|
+
const int l = w.len;
|
70
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
71
|
+
const int id = w.idx;
|
72
|
+
if (w.transform == 0) {
|
73
|
+
const int matchlen = DictMatchLength(data, id, l, max_length);
|
74
|
+
// Transform "" + kIdentity + ""
|
75
|
+
if (matchlen == l) {
|
76
|
+
AddMatch(id, l, l, matches);
|
77
|
+
found_match = true;
|
78
|
+
}
|
79
|
+
// Transfroms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
|
80
|
+
if (matchlen >= l - 1) {
|
81
|
+
AddMatch(id + 12 * n, l - 1, l, matches);
|
82
|
+
if (l + 2 < max_length &&
|
83
|
+
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
|
84
|
+
data[l + 2] == ' ') {
|
85
|
+
AddMatch(id + 49 * n, l + 3, l, matches);
|
86
|
+
}
|
87
|
+
found_match = true;
|
88
|
+
}
|
89
|
+
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
|
90
|
+
int minlen = std::max<int>(min_length, l - 9);
|
91
|
+
int maxlen = std::min<int>(matchlen, l - 2);
|
92
|
+
for (int len = minlen; len <= maxlen; ++len) {
|
93
|
+
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
94
|
+
found_match = true;
|
95
|
+
}
|
96
|
+
if (matchlen < l || l + 6 >= max_length) {
|
97
|
+
continue;
|
98
|
+
}
|
99
|
+
const uint8_t* s = &data[l];
|
100
|
+
// Transforms "" + kIdentity + <suffix>
|
101
|
+
if (s[0] == ' ') {
|
102
|
+
AddMatch(id + n, l + 1, l, matches);
|
103
|
+
if (s[1] == 'a') {
|
104
|
+
if (s[2] == ' ') {
|
105
|
+
AddMatch(id + 28 * n, l + 3, l, matches);
|
106
|
+
} else if (s[2] == 's') {
|
107
|
+
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
|
108
|
+
} else if (s[2] == 't') {
|
109
|
+
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
|
110
|
+
} else if (s[2] == 'n') {
|
111
|
+
if (s[3] == 'd' && s[4] == ' ') {
|
112
|
+
AddMatch(id + 10 * n, l + 5, l, matches);
|
113
|
+
}
|
114
|
+
}
|
115
|
+
} else if (s[1] == 'b') {
|
116
|
+
if (s[2] == 'y' && s[3] == ' ') {
|
117
|
+
AddMatch(id + 38 * n, l + 4, l, matches);
|
118
|
+
}
|
119
|
+
} else if (s[1] == 'i') {
|
120
|
+
if (s[2] == 'n') {
|
121
|
+
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
122
|
+
} else if (s[2] == 's') {
|
123
|
+
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
|
124
|
+
}
|
125
|
+
} else if (s[1] == 'f') {
|
126
|
+
if (s[2] == 'o') {
|
127
|
+
if (s[3] == 'r' && s[4] == ' ') {
|
128
|
+
AddMatch(id + 25 * n, l + 5, l, matches);
|
129
|
+
}
|
130
|
+
} else if (s[2] == 'r') {
|
131
|
+
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
|
132
|
+
AddMatch(id + 37 * n, l + 6, l, matches);
|
133
|
+
}
|
134
|
+
}
|
135
|
+
} else if (s[1] == 'o') {
|
136
|
+
if (s[2] == 'f') {
|
137
|
+
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
|
138
|
+
} else if (s[2] == 'n') {
|
139
|
+
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
|
140
|
+
}
|
141
|
+
} else if (s[1] == 'n') {
|
142
|
+
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
|
143
|
+
AddMatch(id + 80 * n, l + 5, l, matches);
|
144
|
+
}
|
145
|
+
} else if (s[1] == 't') {
|
146
|
+
if (s[2] == 'h') {
|
147
|
+
if (s[3] == 'e') {
|
148
|
+
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
|
149
|
+
} else if (s[3] == 'a') {
|
150
|
+
if (s[4] == 't' && s[5] == ' ') {
|
151
|
+
AddMatch(id + 29 * n, l + 6, l, matches);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
} else if (s[2] == 'o') {
|
155
|
+
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
|
156
|
+
}
|
157
|
+
} else if (s[1] == 'w') {
|
158
|
+
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
|
159
|
+
AddMatch(id + 35 * n, l + 6, l, matches);
|
160
|
+
}
|
161
|
+
}
|
162
|
+
} else if (s[0] == '"') {
|
163
|
+
AddMatch(id + 19 * n, l + 1, l, matches);
|
164
|
+
if (s[1] == '>') {
|
165
|
+
AddMatch(id + 21 * n, l + 2, l, matches);
|
166
|
+
}
|
167
|
+
} else if (s[0] == '.') {
|
168
|
+
AddMatch(id + 20 * n, l + 1, l, matches);
|
169
|
+
if (s[1] == ' ') {
|
170
|
+
AddMatch(id + 31 * n, l + 2, l, matches);
|
171
|
+
if (s[2] == 'T' && s[3] == 'h') {
|
172
|
+
if (s[4] == 'e') {
|
173
|
+
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
|
174
|
+
} else if (s[4] == 'i') {
|
175
|
+
if (s[5] == 's' && s[6] == ' ') {
|
176
|
+
AddMatch(id + 75 * n, l + 7, l, matches);
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
}
|
181
|
+
} else if (s[0] == ',') {
|
182
|
+
AddMatch(id + 76 * n, l + 1, l, matches);
|
183
|
+
if (s[1] == ' ') {
|
184
|
+
AddMatch(id + 14 * n, l + 2, l, matches);
|
185
|
+
}
|
186
|
+
} else if (s[0] == '\n') {
|
187
|
+
AddMatch(id + 22 * n, l + 1, l, matches);
|
188
|
+
if (s[1] == '\t') {
|
189
|
+
AddMatch(id + 50 * n, l + 2, l, matches);
|
190
|
+
}
|
191
|
+
} else if (s[0] == ']') {
|
192
|
+
AddMatch(id + 24 * n, l + 1, l, matches);
|
193
|
+
} else if (s[0] == '\'') {
|
194
|
+
AddMatch(id + 36 * n, l + 1, l, matches);
|
195
|
+
} else if (s[0] == ':') {
|
196
|
+
AddMatch(id + 51 * n, l + 1, l, matches);
|
197
|
+
} else if (s[0] == '(') {
|
198
|
+
AddMatch(id + 57 * n, l + 1, l, matches);
|
199
|
+
} else if (s[0] == '=') {
|
200
|
+
if (s[1] == '"') {
|
201
|
+
AddMatch(id + 70 * n, l + 2, l, matches);
|
202
|
+
} else if (s[1] == '\'') {
|
203
|
+
AddMatch(id + 86 * n, l + 2, l, matches);
|
204
|
+
}
|
205
|
+
} else if (s[0] == 'a') {
|
206
|
+
if (s[1] == 'l' && s[2] == ' ') {
|
207
|
+
AddMatch(id + 84 * n, l + 3, l, matches);
|
208
|
+
}
|
209
|
+
} else if (s[0] == 'e') {
|
210
|
+
if (s[1] == 'd') {
|
211
|
+
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
|
212
|
+
} else if (s[1] == 'r') {
|
213
|
+
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
|
214
|
+
} else if (s[1] == 's') {
|
215
|
+
if (s[2] == 't' && s[3] == ' ') {
|
216
|
+
AddMatch(id + 95 * n, l + 4, l, matches);
|
217
|
+
}
|
218
|
+
}
|
219
|
+
} else if (s[0] == 'f') {
|
220
|
+
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
|
221
|
+
AddMatch(id + 90 * n, l + 4, l, matches);
|
222
|
+
}
|
223
|
+
} else if (s[0] == 'i') {
|
224
|
+
if (s[1] == 'v') {
|
225
|
+
if (s[2] == 'e' && s[3] == ' ') {
|
226
|
+
AddMatch(id + 92 * n, l + 4, l, matches);
|
227
|
+
}
|
228
|
+
} else if (s[1] == 'z') {
|
229
|
+
if (s[2] == 'e' && s[3] == ' ') {
|
230
|
+
AddMatch(id + 100 * n, l + 4, l, matches);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
} else if (s[0] == 'l') {
|
234
|
+
if (s[1] == 'e') {
|
235
|
+
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
|
236
|
+
AddMatch(id + 93 * n, l + 5, l, matches);
|
237
|
+
}
|
238
|
+
} else if (s[1] == 'y') {
|
239
|
+
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
|
240
|
+
}
|
241
|
+
} else if (s[0] == 'o') {
|
242
|
+
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
|
243
|
+
AddMatch(id + 106 * n, l + 4, l, matches);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
} else {
|
247
|
+
// Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
|
248
|
+
const int t = w.transform - 10;
|
249
|
+
if (!IsMatch(w, data, max_length)) {
|
250
|
+
continue;
|
251
|
+
}
|
252
|
+
// Transform "" + kUppercase{First,All} + ""
|
253
|
+
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
|
254
|
+
found_match = true;
|
255
|
+
if (l + 1 >= max_length) {
|
256
|
+
continue;
|
257
|
+
}
|
258
|
+
// Transforms "" + kUppercase{First,All} + <suffix>
|
259
|
+
const uint8_t* s = &data[l];
|
260
|
+
if (s[0] == ' ') {
|
261
|
+
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
|
262
|
+
} else if (s[0] == '"') {
|
263
|
+
AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
|
264
|
+
if (s[1] == '>') {
|
265
|
+
AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
|
266
|
+
}
|
267
|
+
} else if (s[0] == '.') {
|
268
|
+
AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
|
269
|
+
if (s[1] == ' ') {
|
270
|
+
AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
|
271
|
+
}
|
272
|
+
} else if (s[0] == ',') {
|
273
|
+
AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
|
274
|
+
if (s[1] == ' ') {
|
275
|
+
AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
|
276
|
+
}
|
277
|
+
} else if (s[0] == '\'') {
|
278
|
+
AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
|
279
|
+
} else if (s[0] == '(') {
|
280
|
+
AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
|
281
|
+
} else if (s[0] == '=') {
|
282
|
+
if (s[1] == '"') {
|
283
|
+
AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
|
284
|
+
} else if (s[1] == '\'') {
|
285
|
+
AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
}
|
290
|
+
}
|
291
|
+
// Transforms with prefixes " " and "."
|
292
|
+
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
293
|
+
bool is_space = (data[0] == ' ');
|
294
|
+
key = Hash(&data[1]);
|
295
|
+
bucket = kStaticDictionaryBuckets[key];
|
296
|
+
int num = bucket & 0xff;
|
297
|
+
int offset = bucket >> 8;
|
298
|
+
for (int i = 0; i < num; ++i) {
|
299
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
300
|
+
const int l = w.len;
|
301
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
302
|
+
const int id = w.idx;
|
303
|
+
if (w.transform == 0) {
|
304
|
+
if (!IsMatch(w, &data[1], max_length - 1)) {
|
305
|
+
continue;
|
306
|
+
}
|
307
|
+
// Transforms " " + kIdentity + "" and "." + kIdentity + ""
|
308
|
+
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
309
|
+
found_match = true;
|
310
|
+
if (l + 2 >= max_length) {
|
311
|
+
continue;
|
312
|
+
}
|
313
|
+
// Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
314
|
+
const uint8_t* s = &data[l + 1];
|
315
|
+
if (s[0] == ' ') {
|
316
|
+
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
317
|
+
} else if (s[0] == '(') {
|
318
|
+
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
|
319
|
+
} else if (is_space) {
|
320
|
+
if (s[0] == ',') {
|
321
|
+
AddMatch(id + 103 * n, l + 2, l, matches);
|
322
|
+
if (s[1] == ' ') {
|
323
|
+
AddMatch(id + 33 * n, l + 3, l, matches);
|
324
|
+
}
|
325
|
+
} else if (s[0] == '.') {
|
326
|
+
AddMatch(id + 71 * n, l + 2, l, matches);
|
327
|
+
if (s[1] == ' ') {
|
328
|
+
AddMatch(id + 52 * n, l + 3, l, matches);
|
329
|
+
}
|
330
|
+
} else if (s[0] == '=') {
|
331
|
+
if (s[1] == '"') {
|
332
|
+
AddMatch(id + 81 * n, l + 3, l, matches);
|
333
|
+
} else if (s[1] == '\'') {
|
334
|
+
AddMatch(id + 98 * n, l + 3, l, matches);
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
338
|
+
} else if (is_space) {
|
339
|
+
// Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
|
340
|
+
const int t = w.transform - 10;
|
341
|
+
if (!IsMatch(w, &data[1], max_length - 1)) {
|
342
|
+
continue;
|
343
|
+
}
|
344
|
+
// Transforms " " + kUppercase{First,All} + ""
|
345
|
+
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
|
346
|
+
found_match = true;
|
347
|
+
if (l + 2 >= max_length) {
|
348
|
+
continue;
|
349
|
+
}
|
350
|
+
// Transforms " " + kUppercase{First,All} + <suffix>
|
351
|
+
const uint8_t* s = &data[l + 1];
|
352
|
+
if (s[0] == ' ') {
|
353
|
+
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
|
354
|
+
} else if (s[0] == ',') {
|
355
|
+
if (t == 0) {
|
356
|
+
AddMatch(id + 109 * n, l + 2, l, matches);
|
357
|
+
}
|
358
|
+
if (s[1] == ' ') {
|
359
|
+
AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
|
360
|
+
}
|
361
|
+
} else if (s[0] == '.') {
|
362
|
+
AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
|
363
|
+
if (s[1] == ' ') {
|
364
|
+
AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
|
365
|
+
}
|
366
|
+
} else if (s[0] == '=') {
|
367
|
+
if (s[1] == '"') {
|
368
|
+
AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
|
369
|
+
} else if (s[1] == '\'') {
|
370
|
+
AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
|
371
|
+
}
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
375
|
+
}
|
376
|
+
if (max_length >= 6) {
|
377
|
+
// Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
|
378
|
+
if ((data[1] == ' ' &&
|
379
|
+
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
380
|
+
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
381
|
+
key = Hash(&data[2]);
|
382
|
+
bucket = kStaticDictionaryBuckets[key];
|
383
|
+
int num = bucket & 0xff;
|
384
|
+
int offset = bucket >> 8;
|
385
|
+
for (int i = 0; i < num; ++i) {
|
386
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
387
|
+
const int l = w.len;
|
388
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
389
|
+
const int id = w.idx;
|
390
|
+
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
391
|
+
if (data[0] == 0xc2) {
|
392
|
+
AddMatch(id + 102 * n, l + 2, l, matches);
|
393
|
+
found_match = true;
|
394
|
+
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
395
|
+
int t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
396
|
+
AddMatch(id + t * n, l + 3, l, matches);
|
397
|
+
found_match = true;
|
398
|
+
}
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
}
|
403
|
+
if (max_length >= 9) {
|
404
|
+
// Transforms with prefixes " the " and ".com/"
|
405
|
+
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
406
|
+
data[3] == 'e' && data[4] == ' ') ||
|
407
|
+
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
408
|
+
data[3] == 'm' && data[4] == '/')) {
|
409
|
+
key = Hash(&data[5]);
|
410
|
+
bucket = kStaticDictionaryBuckets[key];
|
411
|
+
int num = bucket & 0xff;
|
412
|
+
int offset = bucket >> 8;
|
413
|
+
for (int i = 0; i < num; ++i) {
|
414
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
415
|
+
const int l = w.len;
|
416
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
417
|
+
const int id = w.idx;
|
418
|
+
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
419
|
+
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
420
|
+
found_match = true;
|
421
|
+
if (l + 5 < max_length) {
|
422
|
+
const uint8_t* s = &data[l + 5];
|
423
|
+
if (data[0] == ' ') {
|
424
|
+
if (l + 8 < max_length &&
|
425
|
+
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
|
426
|
+
AddMatch(id + 62 * n, l + 9, l, matches);
|
427
|
+
if (l + 12 < max_length &&
|
428
|
+
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
|
429
|
+
AddMatch(id + 73 * n, l + 13, l, matches);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
}
|
433
|
+
}
|
434
|
+
}
|
435
|
+
}
|
436
|
+
}
|
437
|
+
}
|
438
|
+
return found_match;
|
439
|
+
}
|
440
|
+
|
441
|
+
} // namespace brotli
|