extbrotli 0.0.1.PROTOTYPE

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +28 -0
  3. data/README.md +67 -0
  4. data/Rakefile +158 -0
  5. data/contrib/brotli/LICENSE +202 -0
  6. data/contrib/brotli/README.md +18 -0
  7. data/contrib/brotli/dec/bit_reader.c +55 -0
  8. data/contrib/brotli/dec/bit_reader.h +256 -0
  9. data/contrib/brotli/dec/context.h +260 -0
  10. data/contrib/brotli/dec/decode.c +1573 -0
  11. data/contrib/brotli/dec/decode.h +160 -0
  12. data/contrib/brotli/dec/dictionary.h +9494 -0
  13. data/contrib/brotli/dec/huffman.c +325 -0
  14. data/contrib/brotli/dec/huffman.h +77 -0
  15. data/contrib/brotli/dec/port.h +148 -0
  16. data/contrib/brotli/dec/prefix.h +756 -0
  17. data/contrib/brotli/dec/state.c +149 -0
  18. data/contrib/brotli/dec/state.h +185 -0
  19. data/contrib/brotli/dec/streams.c +99 -0
  20. data/contrib/brotli/dec/streams.h +100 -0
  21. data/contrib/brotli/dec/transform.h +315 -0
  22. data/contrib/brotli/dec/types.h +36 -0
  23. data/contrib/brotli/enc/backward_references.cc +769 -0
  24. data/contrib/brotli/enc/backward_references.h +50 -0
  25. data/contrib/brotli/enc/bit_cost.h +147 -0
  26. data/contrib/brotli/enc/block_splitter.cc +418 -0
  27. data/contrib/brotli/enc/block_splitter.h +78 -0
  28. data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
  29. data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
  30. data/contrib/brotli/enc/cluster.h +290 -0
  31. data/contrib/brotli/enc/command.h +140 -0
  32. data/contrib/brotli/enc/context.h +185 -0
  33. data/contrib/brotli/enc/dictionary.h +9485 -0
  34. data/contrib/brotli/enc/dictionary_hash.h +4125 -0
  35. data/contrib/brotli/enc/encode.cc +715 -0
  36. data/contrib/brotli/enc/encode.h +196 -0
  37. data/contrib/brotli/enc/encode_parallel.cc +354 -0
  38. data/contrib/brotli/enc/encode_parallel.h +37 -0
  39. data/contrib/brotli/enc/entropy_encode.cc +492 -0
  40. data/contrib/brotli/enc/entropy_encode.h +88 -0
  41. data/contrib/brotli/enc/fast_log.h +179 -0
  42. data/contrib/brotli/enc/find_match_length.h +87 -0
  43. data/contrib/brotli/enc/hash.h +686 -0
  44. data/contrib/brotli/enc/histogram.cc +76 -0
  45. data/contrib/brotli/enc/histogram.h +100 -0
  46. data/contrib/brotli/enc/literal_cost.cc +172 -0
  47. data/contrib/brotli/enc/literal_cost.h +38 -0
  48. data/contrib/brotli/enc/metablock.cc +544 -0
  49. data/contrib/brotli/enc/metablock.h +88 -0
  50. data/contrib/brotli/enc/port.h +151 -0
  51. data/contrib/brotli/enc/prefix.h +85 -0
  52. data/contrib/brotli/enc/ringbuffer.h +108 -0
  53. data/contrib/brotli/enc/static_dict.cc +441 -0
  54. data/contrib/brotli/enc/static_dict.h +40 -0
  55. data/contrib/brotli/enc/static_dict_lut.h +12063 -0
  56. data/contrib/brotli/enc/streams.cc +127 -0
  57. data/contrib/brotli/enc/streams.h +129 -0
  58. data/contrib/brotli/enc/transform.h +250 -0
  59. data/contrib/brotli/enc/write_bits.h +91 -0
  60. data/ext/extbrotli.cc +24 -0
  61. data/ext/extbrotli.h +73 -0
  62. data/ext/extconf.rb +35 -0
  63. data/ext/lldecoder.c +220 -0
  64. data/ext/llencoder.cc +433 -0
  65. data/gemstub.rb +21 -0
  66. data/lib/extbrotli.rb +243 -0
  67. data/lib/extbrotli/version.rb +3 -0
  68. metadata +140 -0
@@ -0,0 +1,88 @@
1
+ // Copyright 2015 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Algorithms for distributing the literals and commands of a metablock between
16
+ // block types and contexts.
17
+
18
+ #ifndef BROTLI_ENC_METABLOCK_H_
19
+ #define BROTLI_ENC_METABLOCK_H_
20
+
21
+ #include <vector>
22
+
23
+ #include "./command.h"
24
+ #include "./histogram.h"
25
+
26
+ namespace brotli {
27
+
28
+ struct BlockSplit {
29
+ BlockSplit() : num_types(0) {}
30
+
31
+ int num_types;
32
+ std::vector<int> types;
33
+ std::vector<int> lengths;
34
+ };
35
+
36
+ struct MetaBlockSplit {
37
+ BlockSplit literal_split;
38
+ BlockSplit command_split;
39
+ BlockSplit distance_split;
40
+ std::vector<int> literal_context_map;
41
+ std::vector<int> distance_context_map;
42
+ std::vector<HistogramLiteral> literal_histograms;
43
+ std::vector<HistogramCommand> command_histograms;
44
+ std::vector<HistogramDistance> distance_histograms;
45
+ };
46
+
47
+ // Uses the slow shortest-path block splitter and does context clustering.
48
+ void BuildMetaBlock(const uint8_t* ringbuffer,
49
+ const size_t pos,
50
+ const size_t mask,
51
+ uint8_t prev_byte,
52
+ uint8_t prev_byte2,
53
+ const Command* cmds,
54
+ size_t num_commands,
55
+ int literal_context_mode,
56
+ MetaBlockSplit* mb);
57
+
58
+ // Uses a fast greedy block splitter that tries to merge current block with the
59
+ // last or the second last block and does not do any context modeling.
60
+ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
61
+ size_t pos,
62
+ size_t mask,
63
+ const Command *commands,
64
+ size_t n_commands,
65
+ MetaBlockSplit* mb);
66
+
67
+ // Uses a fast greedy block splitter that tries to merge current block with the
68
+ // last or the second last block and uses a static context clustering which
69
+ // is the same for all block types.
70
+ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
71
+ size_t pos,
72
+ size_t mask,
73
+ uint8_t prev_byte,
74
+ uint8_t prev_byte2,
75
+ int literal_context_mode,
76
+ int num_contexts,
77
+ const int* static_context_map,
78
+ const Command *commands,
79
+ size_t n_commands,
80
+ MetaBlockSplit* mb);
81
+
82
+ void OptimizeHistograms(int num_direct_distance_codes,
83
+ int distance_postfix_bits,
84
+ MetaBlockSplit* mb);
85
+
86
+ } // namespace brotli
87
+
88
+ #endif // BROTLI_ENC_METABLOCK_H_
@@ -0,0 +1,151 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Macros for endianness, branch prediction and unaligned loads and stores.
16
+
17
+ #ifndef BROTLI_ENC_PORT_H_
18
+ #define BROTLI_ENC_PORT_H_
19
+
20
+ #include <string.h>
21
+
22
+ #if defined OS_LINUX || defined OS_CYGWIN
23
+ #include <endian.h>
24
+ #elif defined OS_FREEBSD
25
+ #include <machine/endian.h>
26
+ #elif defined OS_MACOSX
27
+ #include <machine/endian.h>
28
+ /* Let's try and follow the Linux convention */
29
+ #define __BYTE_ORDER BYTE_ORDER
30
+ #define __LITTLE_ENDIAN LITTLE_ENDIAN
31
+ #define __BIG_ENDIAN BIG_ENDIAN
32
+ #endif
33
+
34
+ // define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
35
+ // using the above endian definitions from endian.h if
36
+ // endian.h was included
37
+ #ifdef __BYTE_ORDER
38
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
39
+ #define IS_LITTLE_ENDIAN
40
+ #endif
41
+
42
+ #if __BYTE_ORDER == __BIG_ENDIAN
43
+ #define IS_BIG_ENDIAN
44
+ #endif
45
+
46
+ #else
47
+
48
+ #if defined(__LITTLE_ENDIAN__)
49
+ #define IS_LITTLE_ENDIAN
50
+ #elif defined(__BIG_ENDIAN__)
51
+ #define IS_BIG_ENDIAN
52
+ #endif
53
+ #endif // __BYTE_ORDER
54
+
55
+ // Enable little-endian optimization for x64 architecture on Windows.
56
+ #if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
57
+ #define IS_LITTLE_ENDIAN
58
+ #endif
59
+
60
+ /* Compatibility with non-clang compilers. */
61
+ #ifndef __has_builtin
62
+ #define __has_builtin(x) 0
63
+ #endif
64
+
65
+ #if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
66
+ (defined(__llvm__) && __has_builtin(__builtin_expect))
67
+ #define PREDICT_FALSE(x) (__builtin_expect(x, 0))
68
+ #define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
69
+ #else
70
+ #define PREDICT_FALSE(x) (x)
71
+ #define PREDICT_TRUE(x) (x)
72
+ #endif
73
+
74
+ // Portable handling of unaligned loads, stores, and copies.
75
+ // On some platforms, like ARM, the copy functions can be more efficient
76
+ // then a load and a store.
77
+
78
+ #if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
79
+ defined(ARCH_K8) || defined(_ARCH_PPC)
80
+
81
+ // x86 and x86-64 can perform unaligned loads/stores directly;
82
+ // modern PowerPC hardware can also do unaligned integer loads and stores;
83
+ // but note: the FPU still sends unaligned loads and stores to a trap handler!
84
+
85
+ #define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
86
+ #define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
87
+
88
+ #define BROTLI_UNALIGNED_STORE32(_p, _val) \
89
+ (*reinterpret_cast<uint32_t *>(_p) = (_val))
90
+ #define BROTLI_UNALIGNED_STORE64(_p, _val) \
91
+ (*reinterpret_cast<uint64_t *>(_p) = (_val))
92
+
93
+ #elif defined(__arm__) && \
94
+ !defined(__ARM_ARCH_5__) && \
95
+ !defined(__ARM_ARCH_5T__) && \
96
+ !defined(__ARM_ARCH_5TE__) && \
97
+ !defined(__ARM_ARCH_5TEJ__) && \
98
+ !defined(__ARM_ARCH_6__) && \
99
+ !defined(__ARM_ARCH_6J__) && \
100
+ !defined(__ARM_ARCH_6K__) && \
101
+ !defined(__ARM_ARCH_6Z__) && \
102
+ !defined(__ARM_ARCH_6ZK__) && \
103
+ !defined(__ARM_ARCH_6T2__)
104
+
105
+ // ARMv7 and newer support native unaligned accesses, but only of 16-bit
106
+ // and 32-bit values (not 64-bit); older versions either raise a fatal signal,
107
+ // do an unaligned read and rotate the words around a bit, or do the reads very
108
+ // slowly (trip through kernel mode).
109
+
110
+ #define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
111
+ #define BROTLI_UNALIGNED_STORE32(_p, _val) \
112
+ (*reinterpret_cast<uint32_t *>(_p) = (_val))
113
+
114
+ inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
115
+ uint64_t t;
116
+ memcpy(&t, p, sizeof t);
117
+ return t;
118
+ }
119
+
120
+ inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
121
+ memcpy(p, &v, sizeof v);
122
+ }
123
+
124
+ #else
125
+
126
+ // These functions are provided for architectures that don't support
127
+ // unaligned loads and stores.
128
+
129
+ inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
130
+ uint32_t t;
131
+ memcpy(&t, p, sizeof t);
132
+ return t;
133
+ }
134
+
135
+ inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
136
+ uint64_t t;
137
+ memcpy(&t, p, sizeof t);
138
+ return t;
139
+ }
140
+
141
+ inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
142
+ memcpy(p, &v, sizeof v);
143
+ }
144
+
145
+ inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
146
+ memcpy(p, &v, sizeof v);
147
+ }
148
+
149
+ #endif
150
+
151
+ #endif // BROTLI_ENC_PORT_H_
@@ -0,0 +1,85 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Functions for encoding of integers into prefix codes the amount of extra
16
+ // bits, and the actual values of the extra bits.
17
+
18
+ #ifndef BROTLI_ENC_PREFIX_H_
19
+ #define BROTLI_ENC_PREFIX_H_
20
+
21
+ #include <stdint.h>
22
+ #include "./fast_log.h"
23
+
24
+ namespace brotli {
25
+
26
+ static const int kNumInsertLenPrefixes = 24;
27
+ static const int kNumCopyLenPrefixes = 24;
28
+ static const int kNumCommandPrefixes = 704;
29
+ static const int kNumBlockLenPrefixes = 26;
30
+ static const int kNumDistanceShortCodes = 16;
31
+ static const int kNumDistancePrefixes = 520;
32
+
33
+ // Represents the range of values belonging to a prefix code:
34
+ // [offset, offset + 2^nbits)
35
+ struct PrefixCodeRange {
36
+ int offset;
37
+ int nbits;
38
+ };
39
+
40
+ static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
41
+ { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
42
+ { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
43
+ { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
44
+ { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
45
+ { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
46
+ { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
47
+ {8433, 13}, {16625, 24}
48
+ };
49
+
50
+ inline void GetBlockLengthPrefixCode(int len,
51
+ int* code, int* n_extra, int* extra) {
52
+ *code = 0;
53
+ while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
54
+ ++(*code);
55
+ }
56
+ *n_extra = kBlockLengthPrefixCode[*code].nbits;
57
+ *extra = len - kBlockLengthPrefixCode[*code].offset;
58
+ }
59
+
60
+ inline void PrefixEncodeCopyDistance(int distance_code,
61
+ int num_direct_codes,
62
+ int postfix_bits,
63
+ uint16_t* code,
64
+ uint32_t* extra_bits) {
65
+ if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
66
+ *code = distance_code;
67
+ *extra_bits = 0;
68
+ return;
69
+ }
70
+ distance_code -= kNumDistanceShortCodes + num_direct_codes;
71
+ distance_code += (1 << (postfix_bits + 2));
72
+ int bucket = Log2Floor(distance_code) - 1;
73
+ int postfix_mask = (1 << postfix_bits) - 1;
74
+ int postfix = distance_code & postfix_mask;
75
+ int prefix = (distance_code >> bucket) & 1;
76
+ int offset = (2 + prefix) << bucket;
77
+ int nbits = bucket - postfix_bits;
78
+ *code = kNumDistanceShortCodes + num_direct_codes +
79
+ ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix;
80
+ *extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
81
+ }
82
+
83
+ } // namespace brotli
84
+
85
+ #endif // BROTLI_ENC_PREFIX_H_
@@ -0,0 +1,108 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Sliding window over the input data.
16
+
17
+ #ifndef BROTLI_ENC_RINGBUFFER_H_
18
+ #define BROTLI_ENC_RINGBUFFER_H_
19
+
20
+ #include <stddef.h>
21
+ #include <stdint.h>
22
+
23
+ #include "./port.h"
24
+
25
+ namespace brotli {
26
+
27
+ // A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
28
+ // data in a circular manner: writing a byte writes it to
29
+ // `position() % (1 << window_bits)'. For convenience, the RingBuffer array
30
+ // contains another copy of the first `1 << tail_bits' bytes:
31
+ // buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
32
+ class RingBuffer {
33
+ public:
34
+ RingBuffer(int window_bits, int tail_bits)
35
+ : window_bits_(window_bits),
36
+ mask_((1 << window_bits) - 1),
37
+ tail_size_(1 << tail_bits),
38
+ pos_(0) {
39
+ static const int kSlackForEightByteHashingEverywhere = 7;
40
+ const int buflen = (1 << window_bits_) + tail_size_;
41
+ buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere];
42
+ for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
43
+ buffer_[buflen + i] = 0;
44
+ }
45
+ }
46
+ ~RingBuffer() {
47
+ delete [] buffer_;
48
+ }
49
+
50
+ // Push bytes into the ring buffer.
51
+ void Write(const uint8_t *bytes, size_t n) {
52
+ const size_t masked_pos = pos_ & mask_;
53
+ // The length of the writes is limited so that we do not need to worry
54
+ // about a write
55
+ WriteTail(bytes, n);
56
+ if (PREDICT_TRUE(masked_pos + n <= (1 << window_bits_))) {
57
+ // A single write fits.
58
+ memcpy(&buffer_[masked_pos], bytes, n);
59
+ } else {
60
+ // Split into two writes.
61
+ // Copy into the end of the buffer, including the tail buffer.
62
+ memcpy(&buffer_[masked_pos], bytes,
63
+ std::min(n, ((1 << window_bits_) + tail_size_) - masked_pos));
64
+ // Copy into the begining of the buffer
65
+ memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
66
+ n - ((1 << window_bits_) - masked_pos));
67
+ }
68
+ pos_ += n;
69
+ }
70
+
71
+ void Reset() {
72
+ pos_ = 0;
73
+ }
74
+
75
+ // Logical cursor position in the ring buffer.
76
+ size_t position() const { return pos_; }
77
+
78
+ // Bit mask for getting the physical position for a logical position.
79
+ size_t mask() const { return mask_; }
80
+
81
+ uint8_t *start() { return &buffer_[0]; }
82
+ const uint8_t *start() const { return &buffer_[0]; }
83
+
84
+ private:
85
+ void WriteTail(const uint8_t *bytes, size_t n) {
86
+ const size_t masked_pos = pos_ & mask_;
87
+ if (PREDICT_FALSE(masked_pos < tail_size_)) {
88
+ // Just fill the tail buffer with the beginning data.
89
+ const size_t p = (1 << window_bits_) + masked_pos;
90
+ memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
91
+ }
92
+ }
93
+
94
+ // Size of the ringbuffer is (1 << window_bits) + tail_size_.
95
+ const int window_bits_;
96
+ const size_t mask_;
97
+ const size_t tail_size_;
98
+
99
+ // Position to write in the ring buffer.
100
+ size_t pos_;
101
+ // The actual ring buffer containing the data and the copy of the beginning
102
+ // as a tail.
103
+ uint8_t *buffer_;
104
+ };
105
+
106
+ } // namespace brotli
107
+
108
+ #endif // BROTLI_ENC_RINGBUFFER_H_
@@ -0,0 +1,441 @@
1
+ #include "./static_dict.h"
2
+
3
+ #include <algorithm>
4
+
5
+ #include "./dictionary.h"
6
+ #include "./find_match_length.h"
7
+ #include "./static_dict_lut.h"
8
+ #include "./transform.h"
9
+
10
+ namespace brotli {
11
+
12
+ inline uint32_t Hash(const uint8_t *data) {
13
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
14
+ // The higher bits contain more mixture from the multiplication,
15
+ // so we take our results from there.
16
+ return h >> (32 - kDictNumBits);
17
+ }
18
+
19
+ inline void AddMatch(int distance, int len, int len_code, int* matches) {
20
+ matches[len] = std::min(matches[len], (distance << 5) + len_code);
21
+ }
22
+
23
+ inline int DictMatchLength(const uint8_t* data, int id, int len, int maxlen) {
24
+ const int offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
25
+ return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
26
+ std::min(len, maxlen));
27
+ }
28
+
29
+ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
30
+ if (w.len > max_length) return false;
31
+ const int offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
32
+ const uint8_t* dict = &kBrotliDictionary[offset];
33
+ if (w.transform == 0) {
34
+ // Match against base dictionary word.
35
+ return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
36
+ } else if (w.transform == 10) {
37
+ // Match against uppercase first transform.
38
+ // Note that there are only ASCII uppercase words in the lookup table.
39
+ return (dict[0] >= 'a' && dict[0] <= 'z' &&
40
+ (dict[0] ^ 32) == data[0] &&
41
+ FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1) ==
42
+ w.len - 1);
43
+ } else {
44
+ // Match against uppercase all transform.
45
+ // Note that there are only ASCII uppercase words in the lookup table.
46
+ for (int i = 0; i < w.len; ++i) {
47
+ if (dict[i] >= 'a' && dict[i] <= 'z') {
48
+ if ((dict[i] ^ 32) != data[i]) return false;
49
+ } else {
50
+ if (dict[i] != data[i]) return false;
51
+ }
52
+ }
53
+ return true;
54
+ }
55
+ }
56
+
57
+ bool FindAllStaticDictionaryMatches(const uint8_t* data,
58
+ int min_length,
59
+ int max_length,
60
+ int* matches) {
61
+ bool found_match = false;
62
+ uint32_t key = Hash(data);
63
+ uint32_t bucket = kStaticDictionaryBuckets[key];
64
+ if (bucket != 0) {
65
+ int num = bucket & 0xff;
66
+ int offset = bucket >> 8;
67
+ for (int i = 0; i < num; ++i) {
68
+ const DictWord w = kStaticDictionaryWords[offset + i];
69
+ const int l = w.len;
70
+ const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
71
+ const int id = w.idx;
72
+ if (w.transform == 0) {
73
+ const int matchlen = DictMatchLength(data, id, l, max_length);
74
+ // Transform "" + kIdentity + ""
75
+ if (matchlen == l) {
76
+ AddMatch(id, l, l, matches);
77
+ found_match = true;
78
+ }
79
+ // Transfroms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
80
+ if (matchlen >= l - 1) {
81
+ AddMatch(id + 12 * n, l - 1, l, matches);
82
+ if (l + 2 < max_length &&
83
+ data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
84
+ data[l + 2] == ' ') {
85
+ AddMatch(id + 49 * n, l + 3, l, matches);
86
+ }
87
+ found_match = true;
88
+ }
89
+ // Transform "" + kOmitLastN + "" (N = 2 .. 9)
90
+ int minlen = std::max<int>(min_length, l - 9);
91
+ int maxlen = std::min<int>(matchlen, l - 2);
92
+ for (int len = minlen; len <= maxlen; ++len) {
93
+ AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
94
+ found_match = true;
95
+ }
96
+ if (matchlen < l || l + 6 >= max_length) {
97
+ continue;
98
+ }
99
+ const uint8_t* s = &data[l];
100
+ // Transforms "" + kIdentity + <suffix>
101
+ if (s[0] == ' ') {
102
+ AddMatch(id + n, l + 1, l, matches);
103
+ if (s[1] == 'a') {
104
+ if (s[2] == ' ') {
105
+ AddMatch(id + 28 * n, l + 3, l, matches);
106
+ } else if (s[2] == 's') {
107
+ if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
108
+ } else if (s[2] == 't') {
109
+ if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
110
+ } else if (s[2] == 'n') {
111
+ if (s[3] == 'd' && s[4] == ' ') {
112
+ AddMatch(id + 10 * n, l + 5, l, matches);
113
+ }
114
+ }
115
+ } else if (s[1] == 'b') {
116
+ if (s[2] == 'y' && s[3] == ' ') {
117
+ AddMatch(id + 38 * n, l + 4, l, matches);
118
+ }
119
+ } else if (s[1] == 'i') {
120
+ if (s[2] == 'n') {
121
+ if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
122
+ } else if (s[2] == 's') {
123
+ if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
124
+ }
125
+ } else if (s[1] == 'f') {
126
+ if (s[2] == 'o') {
127
+ if (s[3] == 'r' && s[4] == ' ') {
128
+ AddMatch(id + 25 * n, l + 5, l, matches);
129
+ }
130
+ } else if (s[2] == 'r') {
131
+ if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
132
+ AddMatch(id + 37 * n, l + 6, l, matches);
133
+ }
134
+ }
135
+ } else if (s[1] == 'o') {
136
+ if (s[2] == 'f') {
137
+ if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
138
+ } else if (s[2] == 'n') {
139
+ if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
140
+ }
141
+ } else if (s[1] == 'n') {
142
+ if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
143
+ AddMatch(id + 80 * n, l + 5, l, matches);
144
+ }
145
+ } else if (s[1] == 't') {
146
+ if (s[2] == 'h') {
147
+ if (s[3] == 'e') {
148
+ if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
149
+ } else if (s[3] == 'a') {
150
+ if (s[4] == 't' && s[5] == ' ') {
151
+ AddMatch(id + 29 * n, l + 6, l, matches);
152
+ }
153
+ }
154
+ } else if (s[2] == 'o') {
155
+ if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
156
+ }
157
+ } else if (s[1] == 'w') {
158
+ if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
159
+ AddMatch(id + 35 * n, l + 6, l, matches);
160
+ }
161
+ }
162
+ } else if (s[0] == '"') {
163
+ AddMatch(id + 19 * n, l + 1, l, matches);
164
+ if (s[1] == '>') {
165
+ AddMatch(id + 21 * n, l + 2, l, matches);
166
+ }
167
+ } else if (s[0] == '.') {
168
+ AddMatch(id + 20 * n, l + 1, l, matches);
169
+ if (s[1] == ' ') {
170
+ AddMatch(id + 31 * n, l + 2, l, matches);
171
+ if (s[2] == 'T' && s[3] == 'h') {
172
+ if (s[4] == 'e') {
173
+ if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
174
+ } else if (s[4] == 'i') {
175
+ if (s[5] == 's' && s[6] == ' ') {
176
+ AddMatch(id + 75 * n, l + 7, l, matches);
177
+ }
178
+ }
179
+ }
180
+ }
181
+ } else if (s[0] == ',') {
182
+ AddMatch(id + 76 * n, l + 1, l, matches);
183
+ if (s[1] == ' ') {
184
+ AddMatch(id + 14 * n, l + 2, l, matches);
185
+ }
186
+ } else if (s[0] == '\n') {
187
+ AddMatch(id + 22 * n, l + 1, l, matches);
188
+ if (s[1] == '\t') {
189
+ AddMatch(id + 50 * n, l + 2, l, matches);
190
+ }
191
+ } else if (s[0] == ']') {
192
+ AddMatch(id + 24 * n, l + 1, l, matches);
193
+ } else if (s[0] == '\'') {
194
+ AddMatch(id + 36 * n, l + 1, l, matches);
195
+ } else if (s[0] == ':') {
196
+ AddMatch(id + 51 * n, l + 1, l, matches);
197
+ } else if (s[0] == '(') {
198
+ AddMatch(id + 57 * n, l + 1, l, matches);
199
+ } else if (s[0] == '=') {
200
+ if (s[1] == '"') {
201
+ AddMatch(id + 70 * n, l + 2, l, matches);
202
+ } else if (s[1] == '\'') {
203
+ AddMatch(id + 86 * n, l + 2, l, matches);
204
+ }
205
+ } else if (s[0] == 'a') {
206
+ if (s[1] == 'l' && s[2] == ' ') {
207
+ AddMatch(id + 84 * n, l + 3, l, matches);
208
+ }
209
+ } else if (s[0] == 'e') {
210
+ if (s[1] == 'd') {
211
+ if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
212
+ } else if (s[1] == 'r') {
213
+ if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
214
+ } else if (s[1] == 's') {
215
+ if (s[2] == 't' && s[3] == ' ') {
216
+ AddMatch(id + 95 * n, l + 4, l, matches);
217
+ }
218
+ }
219
+ } else if (s[0] == 'f') {
220
+ if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
221
+ AddMatch(id + 90 * n, l + 4, l, matches);
222
+ }
223
+ } else if (s[0] == 'i') {
224
+ if (s[1] == 'v') {
225
+ if (s[2] == 'e' && s[3] == ' ') {
226
+ AddMatch(id + 92 * n, l + 4, l, matches);
227
+ }
228
+ } else if (s[1] == 'z') {
229
+ if (s[2] == 'e' && s[3] == ' ') {
230
+ AddMatch(id + 100 * n, l + 4, l, matches);
231
+ }
232
+ }
233
+ } else if (s[0] == 'l') {
234
+ if (s[1] == 'e') {
235
+ if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
236
+ AddMatch(id + 93 * n, l + 5, l, matches);
237
+ }
238
+ } else if (s[1] == 'y') {
239
+ if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
240
+ }
241
+ } else if (s[0] == 'o') {
242
+ if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
243
+ AddMatch(id + 106 * n, l + 4, l, matches);
244
+ }
245
+ }
246
+ } else {
247
+ // Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
248
+ const int t = w.transform - 10;
249
+ if (!IsMatch(w, data, max_length)) {
250
+ continue;
251
+ }
252
+ // Transform "" + kUppercase{First,All} + ""
253
+ AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
254
+ found_match = true;
255
+ if (l + 1 >= max_length) {
256
+ continue;
257
+ }
258
+ // Transforms "" + kUppercase{First,All} + <suffix>
259
+ const uint8_t* s = &data[l];
260
+ if (s[0] == ' ') {
261
+ AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
262
+ } else if (s[0] == '"') {
263
+ AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
264
+ if (s[1] == '>') {
265
+ AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
266
+ }
267
+ } else if (s[0] == '.') {
268
+ AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
269
+ if (s[1] == ' ') {
270
+ AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
271
+ }
272
+ } else if (s[0] == ',') {
273
+ AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
274
+ if (s[1] == ' ') {
275
+ AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
276
+ }
277
+ } else if (s[0] == '\'') {
278
+ AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
279
+ } else if (s[0] == '(') {
280
+ AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
281
+ } else if (s[0] == '=') {
282
+ if (s[1] == '"') {
283
+ AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
284
+ } else if (s[1] == '\'') {
285
+ AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
286
+ }
287
+ }
288
+ }
289
+ }
290
+ }
291
+ // Transforms with prefixes " " and "."
292
+ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
293
+ bool is_space = (data[0] == ' ');
294
+ key = Hash(&data[1]);
295
+ bucket = kStaticDictionaryBuckets[key];
296
+ int num = bucket & 0xff;
297
+ int offset = bucket >> 8;
298
+ for (int i = 0; i < num; ++i) {
299
+ const DictWord w = kStaticDictionaryWords[offset + i];
300
+ const int l = w.len;
301
+ const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
302
+ const int id = w.idx;
303
+ if (w.transform == 0) {
304
+ if (!IsMatch(w, &data[1], max_length - 1)) {
305
+ continue;
306
+ }
307
+ // Transforms " " + kIdentity + "" and "." + kIdentity + ""
308
+ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
309
+ found_match = true;
310
+ if (l + 2 >= max_length) {
311
+ continue;
312
+ }
313
+ // Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
314
+ const uint8_t* s = &data[l + 1];
315
+ if (s[0] == ' ') {
316
+ AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
317
+ } else if (s[0] == '(') {
318
+ AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
319
+ } else if (is_space) {
320
+ if (s[0] == ',') {
321
+ AddMatch(id + 103 * n, l + 2, l, matches);
322
+ if (s[1] == ' ') {
323
+ AddMatch(id + 33 * n, l + 3, l, matches);
324
+ }
325
+ } else if (s[0] == '.') {
326
+ AddMatch(id + 71 * n, l + 2, l, matches);
327
+ if (s[1] == ' ') {
328
+ AddMatch(id + 52 * n, l + 3, l, matches);
329
+ }
330
+ } else if (s[0] == '=') {
331
+ if (s[1] == '"') {
332
+ AddMatch(id + 81 * n, l + 3, l, matches);
333
+ } else if (s[1] == '\'') {
334
+ AddMatch(id + 98 * n, l + 3, l, matches);
335
+ }
336
+ }
337
+ }
338
+ } else if (is_space) {
339
+ // Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
340
+ const int t = w.transform - 10;
341
+ if (!IsMatch(w, &data[1], max_length - 1)) {
342
+ continue;
343
+ }
344
+ // Transforms " " + kUppercase{First,All} + ""
345
+ AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
346
+ found_match = true;
347
+ if (l + 2 >= max_length) {
348
+ continue;
349
+ }
350
+ // Transforms " " + kUppercase{First,All} + <suffix>
351
+ const uint8_t* s = &data[l + 1];
352
+ if (s[0] == ' ') {
353
+ AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
354
+ } else if (s[0] == ',') {
355
+ if (t == 0) {
356
+ AddMatch(id + 109 * n, l + 2, l, matches);
357
+ }
358
+ if (s[1] == ' ') {
359
+ AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
360
+ }
361
+ } else if (s[0] == '.') {
362
+ AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
363
+ if (s[1] == ' ') {
364
+ AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
365
+ }
366
+ } else if (s[0] == '=') {
367
+ if (s[1] == '"') {
368
+ AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
369
+ } else if (s[1] == '\'') {
370
+ AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
371
+ }
372
+ }
373
+ }
374
+ }
375
+ }
376
+ if (max_length >= 6) {
377
+ // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
378
+ if ((data[1] == ' ' &&
379
+ (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
380
+ (data[0] == 0xc2 && data[1] == 0xa0)) {
381
+ key = Hash(&data[2]);
382
+ bucket = kStaticDictionaryBuckets[key];
383
+ int num = bucket & 0xff;
384
+ int offset = bucket >> 8;
385
+ for (int i = 0; i < num; ++i) {
386
+ const DictWord w = kStaticDictionaryWords[offset + i];
387
+ const int l = w.len;
388
+ const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
389
+ const int id = w.idx;
390
+ if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
391
+ if (data[0] == 0xc2) {
392
+ AddMatch(id + 102 * n, l + 2, l, matches);
393
+ found_match = true;
394
+ } else if (l + 2 < max_length && data[l + 2] == ' ') {
395
+ int t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
396
+ AddMatch(id + t * n, l + 3, l, matches);
397
+ found_match = true;
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ if (max_length >= 9) {
404
+ // Transforms with prefixes " the " and ".com/"
405
+ if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
406
+ data[3] == 'e' && data[4] == ' ') ||
407
+ (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
408
+ data[3] == 'm' && data[4] == '/')) {
409
+ key = Hash(&data[5]);
410
+ bucket = kStaticDictionaryBuckets[key];
411
+ int num = bucket & 0xff;
412
+ int offset = bucket >> 8;
413
+ for (int i = 0; i < num; ++i) {
414
+ const DictWord w = kStaticDictionaryWords[offset + i];
415
+ const int l = w.len;
416
+ const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
417
+ const int id = w.idx;
418
+ if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
419
+ AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
420
+ found_match = true;
421
+ if (l + 5 < max_length) {
422
+ const uint8_t* s = &data[l + 5];
423
+ if (data[0] == ' ') {
424
+ if (l + 8 < max_length &&
425
+ s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
426
+ AddMatch(id + 62 * n, l + 9, l, matches);
427
+ if (l + 12 < max_length &&
428
+ s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
429
+ AddMatch(id + 73 * n, l + 13, l, matches);
430
+ }
431
+ }
432
+ }
433
+ }
434
+ }
435
+ }
436
+ }
437
+ }
438
+ return found_match;
439
+ }
440
+
441
+ } // namespace brotli