extbrotli 0.0.1.PROTOTYPE
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +28 -0
- data/README.md +67 -0
- data/Rakefile +158 -0
- data/contrib/brotli/LICENSE +202 -0
- data/contrib/brotli/README.md +18 -0
- data/contrib/brotli/dec/bit_reader.c +55 -0
- data/contrib/brotli/dec/bit_reader.h +256 -0
- data/contrib/brotli/dec/context.h +260 -0
- data/contrib/brotli/dec/decode.c +1573 -0
- data/contrib/brotli/dec/decode.h +160 -0
- data/contrib/brotli/dec/dictionary.h +9494 -0
- data/contrib/brotli/dec/huffman.c +325 -0
- data/contrib/brotli/dec/huffman.h +77 -0
- data/contrib/brotli/dec/port.h +148 -0
- data/contrib/brotli/dec/prefix.h +756 -0
- data/contrib/brotli/dec/state.c +149 -0
- data/contrib/brotli/dec/state.h +185 -0
- data/contrib/brotli/dec/streams.c +99 -0
- data/contrib/brotli/dec/streams.h +100 -0
- data/contrib/brotli/dec/transform.h +315 -0
- data/contrib/brotli/dec/types.h +36 -0
- data/contrib/brotli/enc/backward_references.cc +769 -0
- data/contrib/brotli/enc/backward_references.h +50 -0
- data/contrib/brotli/enc/bit_cost.h +147 -0
- data/contrib/brotli/enc/block_splitter.cc +418 -0
- data/contrib/brotli/enc/block_splitter.h +78 -0
- data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
- data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
- data/contrib/brotli/enc/cluster.h +290 -0
- data/contrib/brotli/enc/command.h +140 -0
- data/contrib/brotli/enc/context.h +185 -0
- data/contrib/brotli/enc/dictionary.h +9485 -0
- data/contrib/brotli/enc/dictionary_hash.h +4125 -0
- data/contrib/brotli/enc/encode.cc +715 -0
- data/contrib/brotli/enc/encode.h +196 -0
- data/contrib/brotli/enc/encode_parallel.cc +354 -0
- data/contrib/brotli/enc/encode_parallel.h +37 -0
- data/contrib/brotli/enc/entropy_encode.cc +492 -0
- data/contrib/brotli/enc/entropy_encode.h +88 -0
- data/contrib/brotli/enc/fast_log.h +179 -0
- data/contrib/brotli/enc/find_match_length.h +87 -0
- data/contrib/brotli/enc/hash.h +686 -0
- data/contrib/brotli/enc/histogram.cc +76 -0
- data/contrib/brotli/enc/histogram.h +100 -0
- data/contrib/brotli/enc/literal_cost.cc +172 -0
- data/contrib/brotli/enc/literal_cost.h +38 -0
- data/contrib/brotli/enc/metablock.cc +544 -0
- data/contrib/brotli/enc/metablock.h +88 -0
- data/contrib/brotli/enc/port.h +151 -0
- data/contrib/brotli/enc/prefix.h +85 -0
- data/contrib/brotli/enc/ringbuffer.h +108 -0
- data/contrib/brotli/enc/static_dict.cc +441 -0
- data/contrib/brotli/enc/static_dict.h +40 -0
- data/contrib/brotli/enc/static_dict_lut.h +12063 -0
- data/contrib/brotli/enc/streams.cc +127 -0
- data/contrib/brotli/enc/streams.h +129 -0
- data/contrib/brotli/enc/transform.h +250 -0
- data/contrib/brotli/enc/write_bits.h +91 -0
- data/ext/extbrotli.cc +24 -0
- data/ext/extbrotli.h +73 -0
- data/ext/extconf.rb +35 -0
- data/ext/lldecoder.c +220 -0
- data/ext/llencoder.cc +433 -0
- data/gemstub.rb +21 -0
- data/lib/extbrotli.rb +243 -0
- data/lib/extbrotli/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
// Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Algorithms for distributing the literals and commands of a metablock between
|
16
|
+
// block types and contexts.
|
17
|
+
|
18
|
+
#ifndef BROTLI_ENC_METABLOCK_H_
|
19
|
+
#define BROTLI_ENC_METABLOCK_H_
|
20
|
+
|
21
|
+
#include <vector>
|
22
|
+
|
23
|
+
#include "./command.h"
|
24
|
+
#include "./histogram.h"
|
25
|
+
|
26
|
+
namespace brotli {
|
27
|
+
|
28
|
+
struct BlockSplit {
|
29
|
+
BlockSplit() : num_types(0) {}
|
30
|
+
|
31
|
+
int num_types;
|
32
|
+
std::vector<int> types;
|
33
|
+
std::vector<int> lengths;
|
34
|
+
};
|
35
|
+
|
36
|
+
struct MetaBlockSplit {
|
37
|
+
BlockSplit literal_split;
|
38
|
+
BlockSplit command_split;
|
39
|
+
BlockSplit distance_split;
|
40
|
+
std::vector<int> literal_context_map;
|
41
|
+
std::vector<int> distance_context_map;
|
42
|
+
std::vector<HistogramLiteral> literal_histograms;
|
43
|
+
std::vector<HistogramCommand> command_histograms;
|
44
|
+
std::vector<HistogramDistance> distance_histograms;
|
45
|
+
};
|
46
|
+
|
47
|
+
// Uses the slow shortest-path block splitter and does context clustering.
|
48
|
+
void BuildMetaBlock(const uint8_t* ringbuffer,
|
49
|
+
const size_t pos,
|
50
|
+
const size_t mask,
|
51
|
+
uint8_t prev_byte,
|
52
|
+
uint8_t prev_byte2,
|
53
|
+
const Command* cmds,
|
54
|
+
size_t num_commands,
|
55
|
+
int literal_context_mode,
|
56
|
+
MetaBlockSplit* mb);
|
57
|
+
|
58
|
+
// Uses a fast greedy block splitter that tries to merge current block with the
|
59
|
+
// last or the second last block and does not do any context modeling.
|
60
|
+
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
61
|
+
size_t pos,
|
62
|
+
size_t mask,
|
63
|
+
const Command *commands,
|
64
|
+
size_t n_commands,
|
65
|
+
MetaBlockSplit* mb);
|
66
|
+
|
67
|
+
// Uses a fast greedy block splitter that tries to merge current block with the
|
68
|
+
// last or the second last block and uses a static context clustering which
|
69
|
+
// is the same for all block types.
|
70
|
+
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
71
|
+
size_t pos,
|
72
|
+
size_t mask,
|
73
|
+
uint8_t prev_byte,
|
74
|
+
uint8_t prev_byte2,
|
75
|
+
int literal_context_mode,
|
76
|
+
int num_contexts,
|
77
|
+
const int* static_context_map,
|
78
|
+
const Command *commands,
|
79
|
+
size_t n_commands,
|
80
|
+
MetaBlockSplit* mb);
|
81
|
+
|
82
|
+
void OptimizeHistograms(int num_direct_distance_codes,
|
83
|
+
int distance_postfix_bits,
|
84
|
+
MetaBlockSplit* mb);
|
85
|
+
|
86
|
+
} // namespace brotli
|
87
|
+
|
88
|
+
#endif // BROTLI_ENC_METABLOCK_H_
|
@@ -0,0 +1,151 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Macros for endianness, branch prediction and unaligned loads and stores.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_PORT_H_
|
18
|
+
#define BROTLI_ENC_PORT_H_
|
19
|
+
|
20
|
+
#include <string.h>
|
21
|
+
|
22
|
+
#if defined OS_LINUX || defined OS_CYGWIN
|
23
|
+
#include <endian.h>
|
24
|
+
#elif defined OS_FREEBSD
|
25
|
+
#include <machine/endian.h>
|
26
|
+
#elif defined OS_MACOSX
|
27
|
+
#include <machine/endian.h>
|
28
|
+
/* Let's try and follow the Linux convention */
|
29
|
+
#define __BYTE_ORDER BYTE_ORDER
|
30
|
+
#define __LITTLE_ENDIAN LITTLE_ENDIAN
|
31
|
+
#define __BIG_ENDIAN BIG_ENDIAN
|
32
|
+
#endif
|
33
|
+
|
34
|
+
// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
|
35
|
+
// using the above endian definitions from endian.h if
|
36
|
+
// endian.h was included
|
37
|
+
#ifdef __BYTE_ORDER
|
38
|
+
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
39
|
+
#define IS_LITTLE_ENDIAN
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#if __BYTE_ORDER == __BIG_ENDIAN
|
43
|
+
#define IS_BIG_ENDIAN
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#else
|
47
|
+
|
48
|
+
#if defined(__LITTLE_ENDIAN__)
|
49
|
+
#define IS_LITTLE_ENDIAN
|
50
|
+
#elif defined(__BIG_ENDIAN__)
|
51
|
+
#define IS_BIG_ENDIAN
|
52
|
+
#endif
|
53
|
+
#endif // __BYTE_ORDER
|
54
|
+
|
55
|
+
// Enable little-endian optimization for x64 architecture on Windows.
|
56
|
+
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
|
57
|
+
#define IS_LITTLE_ENDIAN
|
58
|
+
#endif
|
59
|
+
|
60
|
+
/* Compatibility with non-clang compilers. */
|
61
|
+
#ifndef __has_builtin
|
62
|
+
#define __has_builtin(x) 0
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
|
66
|
+
(defined(__llvm__) && __has_builtin(__builtin_expect))
|
67
|
+
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
68
|
+
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
69
|
+
#else
|
70
|
+
#define PREDICT_FALSE(x) (x)
|
71
|
+
#define PREDICT_TRUE(x) (x)
|
72
|
+
#endif
|
73
|
+
|
74
|
+
// Portable handling of unaligned loads, stores, and copies.
|
75
|
+
// On some platforms, like ARM, the copy functions can be more efficient
|
76
|
+
// then a load and a store.
|
77
|
+
|
78
|
+
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
|
79
|
+
defined(ARCH_K8) || defined(_ARCH_PPC)
|
80
|
+
|
81
|
+
// x86 and x86-64 can perform unaligned loads/stores directly;
|
82
|
+
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
83
|
+
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
84
|
+
|
85
|
+
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
86
|
+
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
|
87
|
+
|
88
|
+
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
89
|
+
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
90
|
+
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
|
91
|
+
(*reinterpret_cast<uint64_t *>(_p) = (_val))
|
92
|
+
|
93
|
+
#elif defined(__arm__) && \
|
94
|
+
!defined(__ARM_ARCH_5__) && \
|
95
|
+
!defined(__ARM_ARCH_5T__) && \
|
96
|
+
!defined(__ARM_ARCH_5TE__) && \
|
97
|
+
!defined(__ARM_ARCH_5TEJ__) && \
|
98
|
+
!defined(__ARM_ARCH_6__) && \
|
99
|
+
!defined(__ARM_ARCH_6J__) && \
|
100
|
+
!defined(__ARM_ARCH_6K__) && \
|
101
|
+
!defined(__ARM_ARCH_6Z__) && \
|
102
|
+
!defined(__ARM_ARCH_6ZK__) && \
|
103
|
+
!defined(__ARM_ARCH_6T2__)
|
104
|
+
|
105
|
+
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
106
|
+
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
107
|
+
// do an unaligned read and rotate the words around a bit, or do the reads very
|
108
|
+
// slowly (trip through kernel mode).
|
109
|
+
|
110
|
+
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
111
|
+
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
112
|
+
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
113
|
+
|
114
|
+
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
115
|
+
uint64_t t;
|
116
|
+
memcpy(&t, p, sizeof t);
|
117
|
+
return t;
|
118
|
+
}
|
119
|
+
|
120
|
+
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
121
|
+
memcpy(p, &v, sizeof v);
|
122
|
+
}
|
123
|
+
|
124
|
+
#else
|
125
|
+
|
126
|
+
// These functions are provided for architectures that don't support
|
127
|
+
// unaligned loads and stores.
|
128
|
+
|
129
|
+
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
130
|
+
uint32_t t;
|
131
|
+
memcpy(&t, p, sizeof t);
|
132
|
+
return t;
|
133
|
+
}
|
134
|
+
|
135
|
+
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
136
|
+
uint64_t t;
|
137
|
+
memcpy(&t, p, sizeof t);
|
138
|
+
return t;
|
139
|
+
}
|
140
|
+
|
141
|
+
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
142
|
+
memcpy(p, &v, sizeof v);
|
143
|
+
}
|
144
|
+
|
145
|
+
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
146
|
+
memcpy(p, &v, sizeof v);
|
147
|
+
}
|
148
|
+
|
149
|
+
#endif
|
150
|
+
|
151
|
+
#endif // BROTLI_ENC_PORT_H_
|
@@ -0,0 +1,85 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Functions for encoding of integers into prefix codes the amount of extra
|
16
|
+
// bits, and the actual values of the extra bits.
|
17
|
+
|
18
|
+
#ifndef BROTLI_ENC_PREFIX_H_
|
19
|
+
#define BROTLI_ENC_PREFIX_H_
|
20
|
+
|
21
|
+
#include <stdint.h>
|
22
|
+
#include "./fast_log.h"
|
23
|
+
|
24
|
+
namespace brotli {
|
25
|
+
|
26
|
+
static const int kNumInsertLenPrefixes = 24;
|
27
|
+
static const int kNumCopyLenPrefixes = 24;
|
28
|
+
static const int kNumCommandPrefixes = 704;
|
29
|
+
static const int kNumBlockLenPrefixes = 26;
|
30
|
+
static const int kNumDistanceShortCodes = 16;
|
31
|
+
static const int kNumDistancePrefixes = 520;
|
32
|
+
|
33
|
+
// Represents the range of values belonging to a prefix code:
|
34
|
+
// [offset, offset + 2^nbits)
|
35
|
+
struct PrefixCodeRange {
|
36
|
+
int offset;
|
37
|
+
int nbits;
|
38
|
+
};
|
39
|
+
|
40
|
+
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
41
|
+
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
42
|
+
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
43
|
+
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
|
44
|
+
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
|
45
|
+
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
|
46
|
+
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
|
47
|
+
{8433, 13}, {16625, 24}
|
48
|
+
};
|
49
|
+
|
50
|
+
inline void GetBlockLengthPrefixCode(int len,
|
51
|
+
int* code, int* n_extra, int* extra) {
|
52
|
+
*code = 0;
|
53
|
+
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
|
54
|
+
++(*code);
|
55
|
+
}
|
56
|
+
*n_extra = kBlockLengthPrefixCode[*code].nbits;
|
57
|
+
*extra = len - kBlockLengthPrefixCode[*code].offset;
|
58
|
+
}
|
59
|
+
|
60
|
+
inline void PrefixEncodeCopyDistance(int distance_code,
|
61
|
+
int num_direct_codes,
|
62
|
+
int postfix_bits,
|
63
|
+
uint16_t* code,
|
64
|
+
uint32_t* extra_bits) {
|
65
|
+
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
66
|
+
*code = distance_code;
|
67
|
+
*extra_bits = 0;
|
68
|
+
return;
|
69
|
+
}
|
70
|
+
distance_code -= kNumDistanceShortCodes + num_direct_codes;
|
71
|
+
distance_code += (1 << (postfix_bits + 2));
|
72
|
+
int bucket = Log2Floor(distance_code) - 1;
|
73
|
+
int postfix_mask = (1 << postfix_bits) - 1;
|
74
|
+
int postfix = distance_code & postfix_mask;
|
75
|
+
int prefix = (distance_code >> bucket) & 1;
|
76
|
+
int offset = (2 + prefix) << bucket;
|
77
|
+
int nbits = bucket - postfix_bits;
|
78
|
+
*code = kNumDistanceShortCodes + num_direct_codes +
|
79
|
+
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix;
|
80
|
+
*extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
|
81
|
+
}
|
82
|
+
|
83
|
+
} // namespace brotli
|
84
|
+
|
85
|
+
#endif // BROTLI_ENC_PREFIX_H_
|
@@ -0,0 +1,108 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
// Sliding window over the input data.
|
16
|
+
|
17
|
+
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
18
|
+
#define BROTLI_ENC_RINGBUFFER_H_
|
19
|
+
|
20
|
+
#include <stddef.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
|
23
|
+
#include "./port.h"
|
24
|
+
|
25
|
+
namespace brotli {
|
26
|
+
|
27
|
+
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
28
|
+
// data in a circular manner: writing a byte writes it to
|
29
|
+
// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
|
30
|
+
// contains another copy of the first `1 << tail_bits' bytes:
|
31
|
+
// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
|
32
|
+
class RingBuffer {
|
33
|
+
public:
|
34
|
+
RingBuffer(int window_bits, int tail_bits)
|
35
|
+
: window_bits_(window_bits),
|
36
|
+
mask_((1 << window_bits) - 1),
|
37
|
+
tail_size_(1 << tail_bits),
|
38
|
+
pos_(0) {
|
39
|
+
static const int kSlackForEightByteHashingEverywhere = 7;
|
40
|
+
const int buflen = (1 << window_bits_) + tail_size_;
|
41
|
+
buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere];
|
42
|
+
for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
43
|
+
buffer_[buflen + i] = 0;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
~RingBuffer() {
|
47
|
+
delete [] buffer_;
|
48
|
+
}
|
49
|
+
|
50
|
+
// Push bytes into the ring buffer.
|
51
|
+
void Write(const uint8_t *bytes, size_t n) {
|
52
|
+
const size_t masked_pos = pos_ & mask_;
|
53
|
+
// The length of the writes is limited so that we do not need to worry
|
54
|
+
// about a write
|
55
|
+
WriteTail(bytes, n);
|
56
|
+
if (PREDICT_TRUE(masked_pos + n <= (1 << window_bits_))) {
|
57
|
+
// A single write fits.
|
58
|
+
memcpy(&buffer_[masked_pos], bytes, n);
|
59
|
+
} else {
|
60
|
+
// Split into two writes.
|
61
|
+
// Copy into the end of the buffer, including the tail buffer.
|
62
|
+
memcpy(&buffer_[masked_pos], bytes,
|
63
|
+
std::min(n, ((1 << window_bits_) + tail_size_) - masked_pos));
|
64
|
+
// Copy into the begining of the buffer
|
65
|
+
memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
|
66
|
+
n - ((1 << window_bits_) - masked_pos));
|
67
|
+
}
|
68
|
+
pos_ += n;
|
69
|
+
}
|
70
|
+
|
71
|
+
void Reset() {
|
72
|
+
pos_ = 0;
|
73
|
+
}
|
74
|
+
|
75
|
+
// Logical cursor position in the ring buffer.
|
76
|
+
size_t position() const { return pos_; }
|
77
|
+
|
78
|
+
// Bit mask for getting the physical position for a logical position.
|
79
|
+
size_t mask() const { return mask_; }
|
80
|
+
|
81
|
+
uint8_t *start() { return &buffer_[0]; }
|
82
|
+
const uint8_t *start() const { return &buffer_[0]; }
|
83
|
+
|
84
|
+
private:
|
85
|
+
void WriteTail(const uint8_t *bytes, size_t n) {
|
86
|
+
const size_t masked_pos = pos_ & mask_;
|
87
|
+
if (PREDICT_FALSE(masked_pos < tail_size_)) {
|
88
|
+
// Just fill the tail buffer with the beginning data.
|
89
|
+
const size_t p = (1 << window_bits_) + masked_pos;
|
90
|
+
memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
|
95
|
+
const int window_bits_;
|
96
|
+
const size_t mask_;
|
97
|
+
const size_t tail_size_;
|
98
|
+
|
99
|
+
// Position to write in the ring buffer.
|
100
|
+
size_t pos_;
|
101
|
+
// The actual ring buffer containing the data and the copy of the beginning
|
102
|
+
// as a tail.
|
103
|
+
uint8_t *buffer_;
|
104
|
+
};
|
105
|
+
|
106
|
+
} // namespace brotli
|
107
|
+
|
108
|
+
#endif // BROTLI_ENC_RINGBUFFER_H_
|
@@ -0,0 +1,441 @@
|
|
1
|
+
#include "./static_dict.h"
|
2
|
+
|
3
|
+
#include <algorithm>
|
4
|
+
|
5
|
+
#include "./dictionary.h"
|
6
|
+
#include "./find_match_length.h"
|
7
|
+
#include "./static_dict_lut.h"
|
8
|
+
#include "./transform.h"
|
9
|
+
|
10
|
+
namespace brotli {
|
11
|
+
|
12
|
+
inline uint32_t Hash(const uint8_t *data) {
|
13
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
14
|
+
// The higher bits contain more mixture from the multiplication,
|
15
|
+
// so we take our results from there.
|
16
|
+
return h >> (32 - kDictNumBits);
|
17
|
+
}
|
18
|
+
|
19
|
+
inline void AddMatch(int distance, int len, int len_code, int* matches) {
|
20
|
+
matches[len] = std::min(matches[len], (distance << 5) + len_code);
|
21
|
+
}
|
22
|
+
|
23
|
+
inline int DictMatchLength(const uint8_t* data, int id, int len, int maxlen) {
|
24
|
+
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
25
|
+
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
26
|
+
std::min(len, maxlen));
|
27
|
+
}
|
28
|
+
|
29
|
+
inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
|
30
|
+
if (w.len > max_length) return false;
|
31
|
+
const int offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
|
32
|
+
const uint8_t* dict = &kBrotliDictionary[offset];
|
33
|
+
if (w.transform == 0) {
|
34
|
+
// Match against base dictionary word.
|
35
|
+
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
36
|
+
} else if (w.transform == 10) {
|
37
|
+
// Match against uppercase first transform.
|
38
|
+
// Note that there are only ASCII uppercase words in the lookup table.
|
39
|
+
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
40
|
+
(dict[0] ^ 32) == data[0] &&
|
41
|
+
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1) ==
|
42
|
+
w.len - 1);
|
43
|
+
} else {
|
44
|
+
// Match against uppercase all transform.
|
45
|
+
// Note that there are only ASCII uppercase words in the lookup table.
|
46
|
+
for (int i = 0; i < w.len; ++i) {
|
47
|
+
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
48
|
+
if ((dict[i] ^ 32) != data[i]) return false;
|
49
|
+
} else {
|
50
|
+
if (dict[i] != data[i]) return false;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
return true;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
58
|
+
int min_length,
|
59
|
+
int max_length,
|
60
|
+
int* matches) {
|
61
|
+
bool found_match = false;
|
62
|
+
uint32_t key = Hash(data);
|
63
|
+
uint32_t bucket = kStaticDictionaryBuckets[key];
|
64
|
+
if (bucket != 0) {
|
65
|
+
int num = bucket & 0xff;
|
66
|
+
int offset = bucket >> 8;
|
67
|
+
for (int i = 0; i < num; ++i) {
|
68
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
69
|
+
const int l = w.len;
|
70
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
71
|
+
const int id = w.idx;
|
72
|
+
if (w.transform == 0) {
|
73
|
+
const int matchlen = DictMatchLength(data, id, l, max_length);
|
74
|
+
// Transform "" + kIdentity + ""
|
75
|
+
if (matchlen == l) {
|
76
|
+
AddMatch(id, l, l, matches);
|
77
|
+
found_match = true;
|
78
|
+
}
|
79
|
+
// Transfroms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
|
80
|
+
if (matchlen >= l - 1) {
|
81
|
+
AddMatch(id + 12 * n, l - 1, l, matches);
|
82
|
+
if (l + 2 < max_length &&
|
83
|
+
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
|
84
|
+
data[l + 2] == ' ') {
|
85
|
+
AddMatch(id + 49 * n, l + 3, l, matches);
|
86
|
+
}
|
87
|
+
found_match = true;
|
88
|
+
}
|
89
|
+
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
|
90
|
+
int minlen = std::max<int>(min_length, l - 9);
|
91
|
+
int maxlen = std::min<int>(matchlen, l - 2);
|
92
|
+
for (int len = minlen; len <= maxlen; ++len) {
|
93
|
+
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
94
|
+
found_match = true;
|
95
|
+
}
|
96
|
+
if (matchlen < l || l + 6 >= max_length) {
|
97
|
+
continue;
|
98
|
+
}
|
99
|
+
const uint8_t* s = &data[l];
|
100
|
+
// Transforms "" + kIdentity + <suffix>
|
101
|
+
if (s[0] == ' ') {
|
102
|
+
AddMatch(id + n, l + 1, l, matches);
|
103
|
+
if (s[1] == 'a') {
|
104
|
+
if (s[2] == ' ') {
|
105
|
+
AddMatch(id + 28 * n, l + 3, l, matches);
|
106
|
+
} else if (s[2] == 's') {
|
107
|
+
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
|
108
|
+
} else if (s[2] == 't') {
|
109
|
+
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
|
110
|
+
} else if (s[2] == 'n') {
|
111
|
+
if (s[3] == 'd' && s[4] == ' ') {
|
112
|
+
AddMatch(id + 10 * n, l + 5, l, matches);
|
113
|
+
}
|
114
|
+
}
|
115
|
+
} else if (s[1] == 'b') {
|
116
|
+
if (s[2] == 'y' && s[3] == ' ') {
|
117
|
+
AddMatch(id + 38 * n, l + 4, l, matches);
|
118
|
+
}
|
119
|
+
} else if (s[1] == 'i') {
|
120
|
+
if (s[2] == 'n') {
|
121
|
+
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
122
|
+
} else if (s[2] == 's') {
|
123
|
+
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
|
124
|
+
}
|
125
|
+
} else if (s[1] == 'f') {
|
126
|
+
if (s[2] == 'o') {
|
127
|
+
if (s[3] == 'r' && s[4] == ' ') {
|
128
|
+
AddMatch(id + 25 * n, l + 5, l, matches);
|
129
|
+
}
|
130
|
+
} else if (s[2] == 'r') {
|
131
|
+
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
|
132
|
+
AddMatch(id + 37 * n, l + 6, l, matches);
|
133
|
+
}
|
134
|
+
}
|
135
|
+
} else if (s[1] == 'o') {
|
136
|
+
if (s[2] == 'f') {
|
137
|
+
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
|
138
|
+
} else if (s[2] == 'n') {
|
139
|
+
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
|
140
|
+
}
|
141
|
+
} else if (s[1] == 'n') {
|
142
|
+
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
|
143
|
+
AddMatch(id + 80 * n, l + 5, l, matches);
|
144
|
+
}
|
145
|
+
} else if (s[1] == 't') {
|
146
|
+
if (s[2] == 'h') {
|
147
|
+
if (s[3] == 'e') {
|
148
|
+
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
|
149
|
+
} else if (s[3] == 'a') {
|
150
|
+
if (s[4] == 't' && s[5] == ' ') {
|
151
|
+
AddMatch(id + 29 * n, l + 6, l, matches);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
} else if (s[2] == 'o') {
|
155
|
+
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
|
156
|
+
}
|
157
|
+
} else if (s[1] == 'w') {
|
158
|
+
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
|
159
|
+
AddMatch(id + 35 * n, l + 6, l, matches);
|
160
|
+
}
|
161
|
+
}
|
162
|
+
} else if (s[0] == '"') {
|
163
|
+
AddMatch(id + 19 * n, l + 1, l, matches);
|
164
|
+
if (s[1] == '>') {
|
165
|
+
AddMatch(id + 21 * n, l + 2, l, matches);
|
166
|
+
}
|
167
|
+
} else if (s[0] == '.') {
|
168
|
+
AddMatch(id + 20 * n, l + 1, l, matches);
|
169
|
+
if (s[1] == ' ') {
|
170
|
+
AddMatch(id + 31 * n, l + 2, l, matches);
|
171
|
+
if (s[2] == 'T' && s[3] == 'h') {
|
172
|
+
if (s[4] == 'e') {
|
173
|
+
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
|
174
|
+
} else if (s[4] == 'i') {
|
175
|
+
if (s[5] == 's' && s[6] == ' ') {
|
176
|
+
AddMatch(id + 75 * n, l + 7, l, matches);
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
}
|
181
|
+
} else if (s[0] == ',') {
|
182
|
+
AddMatch(id + 76 * n, l + 1, l, matches);
|
183
|
+
if (s[1] == ' ') {
|
184
|
+
AddMatch(id + 14 * n, l + 2, l, matches);
|
185
|
+
}
|
186
|
+
} else if (s[0] == '\n') {
|
187
|
+
AddMatch(id + 22 * n, l + 1, l, matches);
|
188
|
+
if (s[1] == '\t') {
|
189
|
+
AddMatch(id + 50 * n, l + 2, l, matches);
|
190
|
+
}
|
191
|
+
} else if (s[0] == ']') {
|
192
|
+
AddMatch(id + 24 * n, l + 1, l, matches);
|
193
|
+
} else if (s[0] == '\'') {
|
194
|
+
AddMatch(id + 36 * n, l + 1, l, matches);
|
195
|
+
} else if (s[0] == ':') {
|
196
|
+
AddMatch(id + 51 * n, l + 1, l, matches);
|
197
|
+
} else if (s[0] == '(') {
|
198
|
+
AddMatch(id + 57 * n, l + 1, l, matches);
|
199
|
+
} else if (s[0] == '=') {
|
200
|
+
if (s[1] == '"') {
|
201
|
+
AddMatch(id + 70 * n, l + 2, l, matches);
|
202
|
+
} else if (s[1] == '\'') {
|
203
|
+
AddMatch(id + 86 * n, l + 2, l, matches);
|
204
|
+
}
|
205
|
+
} else if (s[0] == 'a') {
|
206
|
+
if (s[1] == 'l' && s[2] == ' ') {
|
207
|
+
AddMatch(id + 84 * n, l + 3, l, matches);
|
208
|
+
}
|
209
|
+
} else if (s[0] == 'e') {
|
210
|
+
if (s[1] == 'd') {
|
211
|
+
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
|
212
|
+
} else if (s[1] == 'r') {
|
213
|
+
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
|
214
|
+
} else if (s[1] == 's') {
|
215
|
+
if (s[2] == 't' && s[3] == ' ') {
|
216
|
+
AddMatch(id + 95 * n, l + 4, l, matches);
|
217
|
+
}
|
218
|
+
}
|
219
|
+
} else if (s[0] == 'f') {
|
220
|
+
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
|
221
|
+
AddMatch(id + 90 * n, l + 4, l, matches);
|
222
|
+
}
|
223
|
+
} else if (s[0] == 'i') {
|
224
|
+
if (s[1] == 'v') {
|
225
|
+
if (s[2] == 'e' && s[3] == ' ') {
|
226
|
+
AddMatch(id + 92 * n, l + 4, l, matches);
|
227
|
+
}
|
228
|
+
} else if (s[1] == 'z') {
|
229
|
+
if (s[2] == 'e' && s[3] == ' ') {
|
230
|
+
AddMatch(id + 100 * n, l + 4, l, matches);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
} else if (s[0] == 'l') {
|
234
|
+
if (s[1] == 'e') {
|
235
|
+
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
|
236
|
+
AddMatch(id + 93 * n, l + 5, l, matches);
|
237
|
+
}
|
238
|
+
} else if (s[1] == 'y') {
|
239
|
+
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
|
240
|
+
}
|
241
|
+
} else if (s[0] == 'o') {
|
242
|
+
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
|
243
|
+
AddMatch(id + 106 * n, l + 4, l, matches);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
} else {
|
247
|
+
// Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
|
248
|
+
const int t = w.transform - 10;
|
249
|
+
if (!IsMatch(w, data, max_length)) {
|
250
|
+
continue;
|
251
|
+
}
|
252
|
+
// Transform "" + kUppercase{First,All} + ""
|
253
|
+
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
|
254
|
+
found_match = true;
|
255
|
+
if (l + 1 >= max_length) {
|
256
|
+
continue;
|
257
|
+
}
|
258
|
+
// Transforms "" + kUppercase{First,All} + <suffix>
|
259
|
+
const uint8_t* s = &data[l];
|
260
|
+
if (s[0] == ' ') {
|
261
|
+
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
|
262
|
+
} else if (s[0] == '"') {
|
263
|
+
AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
|
264
|
+
if (s[1] == '>') {
|
265
|
+
AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
|
266
|
+
}
|
267
|
+
} else if (s[0] == '.') {
|
268
|
+
AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
|
269
|
+
if (s[1] == ' ') {
|
270
|
+
AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
|
271
|
+
}
|
272
|
+
} else if (s[0] == ',') {
|
273
|
+
AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
|
274
|
+
if (s[1] == ' ') {
|
275
|
+
AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
|
276
|
+
}
|
277
|
+
} else if (s[0] == '\'') {
|
278
|
+
AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
|
279
|
+
} else if (s[0] == '(') {
|
280
|
+
AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
|
281
|
+
} else if (s[0] == '=') {
|
282
|
+
if (s[1] == '"') {
|
283
|
+
AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
|
284
|
+
} else if (s[1] == '\'') {
|
285
|
+
AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
}
|
290
|
+
}
|
291
|
+
// Transforms with prefixes " " and "."
|
292
|
+
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
293
|
+
bool is_space = (data[0] == ' ');
|
294
|
+
key = Hash(&data[1]);
|
295
|
+
bucket = kStaticDictionaryBuckets[key];
|
296
|
+
int num = bucket & 0xff;
|
297
|
+
int offset = bucket >> 8;
|
298
|
+
for (int i = 0; i < num; ++i) {
|
299
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
300
|
+
const int l = w.len;
|
301
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
302
|
+
const int id = w.idx;
|
303
|
+
if (w.transform == 0) {
|
304
|
+
if (!IsMatch(w, &data[1], max_length - 1)) {
|
305
|
+
continue;
|
306
|
+
}
|
307
|
+
// Transforms " " + kIdentity + "" and "." + kIdentity + ""
|
308
|
+
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
309
|
+
found_match = true;
|
310
|
+
if (l + 2 >= max_length) {
|
311
|
+
continue;
|
312
|
+
}
|
313
|
+
// Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
314
|
+
const uint8_t* s = &data[l + 1];
|
315
|
+
if (s[0] == ' ') {
|
316
|
+
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
317
|
+
} else if (s[0] == '(') {
|
318
|
+
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
|
319
|
+
} else if (is_space) {
|
320
|
+
if (s[0] == ',') {
|
321
|
+
AddMatch(id + 103 * n, l + 2, l, matches);
|
322
|
+
if (s[1] == ' ') {
|
323
|
+
AddMatch(id + 33 * n, l + 3, l, matches);
|
324
|
+
}
|
325
|
+
} else if (s[0] == '.') {
|
326
|
+
AddMatch(id + 71 * n, l + 2, l, matches);
|
327
|
+
if (s[1] == ' ') {
|
328
|
+
AddMatch(id + 52 * n, l + 3, l, matches);
|
329
|
+
}
|
330
|
+
} else if (s[0] == '=') {
|
331
|
+
if (s[1] == '"') {
|
332
|
+
AddMatch(id + 81 * n, l + 3, l, matches);
|
333
|
+
} else if (s[1] == '\'') {
|
334
|
+
AddMatch(id + 98 * n, l + 3, l, matches);
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
338
|
+
} else if (is_space) {
|
339
|
+
// Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
|
340
|
+
const int t = w.transform - 10;
|
341
|
+
if (!IsMatch(w, &data[1], max_length - 1)) {
|
342
|
+
continue;
|
343
|
+
}
|
344
|
+
// Transforms " " + kUppercase{First,All} + ""
|
345
|
+
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
|
346
|
+
found_match = true;
|
347
|
+
if (l + 2 >= max_length) {
|
348
|
+
continue;
|
349
|
+
}
|
350
|
+
// Transforms " " + kUppercase{First,All} + <suffix>
|
351
|
+
const uint8_t* s = &data[l + 1];
|
352
|
+
if (s[0] == ' ') {
|
353
|
+
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
|
354
|
+
} else if (s[0] == ',') {
|
355
|
+
if (t == 0) {
|
356
|
+
AddMatch(id + 109 * n, l + 2, l, matches);
|
357
|
+
}
|
358
|
+
if (s[1] == ' ') {
|
359
|
+
AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
|
360
|
+
}
|
361
|
+
} else if (s[0] == '.') {
|
362
|
+
AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
|
363
|
+
if (s[1] == ' ') {
|
364
|
+
AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
|
365
|
+
}
|
366
|
+
} else if (s[0] == '=') {
|
367
|
+
if (s[1] == '"') {
|
368
|
+
AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
|
369
|
+
} else if (s[1] == '\'') {
|
370
|
+
AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
|
371
|
+
}
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
375
|
+
}
|
376
|
+
if (max_length >= 6) {
|
377
|
+
// Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
|
378
|
+
if ((data[1] == ' ' &&
|
379
|
+
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
380
|
+
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
381
|
+
key = Hash(&data[2]);
|
382
|
+
bucket = kStaticDictionaryBuckets[key];
|
383
|
+
int num = bucket & 0xff;
|
384
|
+
int offset = bucket >> 8;
|
385
|
+
for (int i = 0; i < num; ++i) {
|
386
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
387
|
+
const int l = w.len;
|
388
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
389
|
+
const int id = w.idx;
|
390
|
+
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
391
|
+
if (data[0] == 0xc2) {
|
392
|
+
AddMatch(id + 102 * n, l + 2, l, matches);
|
393
|
+
found_match = true;
|
394
|
+
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
395
|
+
int t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
396
|
+
AddMatch(id + t * n, l + 3, l, matches);
|
397
|
+
found_match = true;
|
398
|
+
}
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
}
|
403
|
+
if (max_length >= 9) {
|
404
|
+
// Transforms with prefixes " the " and ".com/"
|
405
|
+
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
406
|
+
data[3] == 'e' && data[4] == ' ') ||
|
407
|
+
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
408
|
+
data[3] == 'm' && data[4] == '/')) {
|
409
|
+
key = Hash(&data[5]);
|
410
|
+
bucket = kStaticDictionaryBuckets[key];
|
411
|
+
int num = bucket & 0xff;
|
412
|
+
int offset = bucket >> 8;
|
413
|
+
for (int i = 0; i < num; ++i) {
|
414
|
+
const DictWord w = kStaticDictionaryWords[offset + i];
|
415
|
+
const int l = w.len;
|
416
|
+
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
|
417
|
+
const int id = w.idx;
|
418
|
+
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
419
|
+
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
420
|
+
found_match = true;
|
421
|
+
if (l + 5 < max_length) {
|
422
|
+
const uint8_t* s = &data[l + 5];
|
423
|
+
if (data[0] == ' ') {
|
424
|
+
if (l + 8 < max_length &&
|
425
|
+
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
|
426
|
+
AddMatch(id + 62 * n, l + 9, l, matches);
|
427
|
+
if (l + 12 < max_length &&
|
428
|
+
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
|
429
|
+
AddMatch(id + 73 * n, l + 13, l, matches);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
}
|
433
|
+
}
|
434
|
+
}
|
435
|
+
}
|
436
|
+
}
|
437
|
+
}
|
438
|
+
return found_match;
|
439
|
+
}
|
440
|
+
|
441
|
+
} // namespace brotli
|