brotli 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +1 -0
- data/Rakefile +6 -1
- data/brotli.gemspec +1 -1
- data/docs/Brotli.html +485 -0
- data/docs/Brotli/Error.html +124 -0
- data/docs/_index.html +122 -0
- data/docs/class_list.html +51 -0
- data/docs/css/common.css +1 -0
- data/docs/css/full_list.css +58 -0
- data/docs/css/style.css +496 -0
- data/docs/file.README.html +127 -0
- data/docs/file_list.html +56 -0
- data/docs/frames.html +17 -0
- data/docs/index.html +127 -0
- data/docs/js/app.js +292 -0
- data/docs/js/full_list.js +216 -0
- data/docs/js/jquery.js +4 -0
- data/docs/method_list.html +67 -0
- data/docs/top-level-namespace.html +110 -0
- data/ext/brotli/brotli.c +20 -0
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/c/common/constants.h +13 -6
- data/vendor/brotli/c/{dec → common}/context.h +182 -172
- data/vendor/brotli/c/common/dictionary.bin +0 -0
- data/vendor/brotli/c/common/dictionary.bin.br +0 -0
- data/vendor/brotli/c/common/dictionary.c +1 -1
- data/vendor/brotli/c/common/dictionary.h +4 -4
- data/vendor/brotli/c/common/platform.h +509 -0
- data/vendor/brotli/c/common/transform.c +235 -0
- data/vendor/brotli/c/common/transform.h +80 -0
- data/vendor/brotli/c/common/version.h +8 -1
- data/vendor/brotli/c/dec/bit_reader.c +1 -1
- data/vendor/brotli/c/dec/bit_reader.h +35 -86
- data/vendor/brotli/c/dec/decode.c +322 -205
- data/vendor/brotli/c/dec/huffman.c +35 -37
- data/vendor/brotli/c/dec/huffman.h +13 -9
- data/vendor/brotli/c/dec/prefix.h +3 -4
- data/vendor/brotli/c/dec/state.c +26 -34
- data/vendor/brotli/c/dec/state.h +34 -23
- data/vendor/brotli/c/enc/backward_references.c +25 -15
- data/vendor/brotli/c/enc/backward_references.h +5 -6
- data/vendor/brotli/c/enc/backward_references_hq.c +94 -68
- data/vendor/brotli/c/enc/backward_references_hq.h +22 -25
- data/vendor/brotli/c/enc/backward_references_inc.h +10 -10
- data/vendor/brotli/c/enc/bit_cost.c +1 -1
- data/vendor/brotli/c/enc/bit_cost.h +5 -5
- data/vendor/brotli/c/enc/block_encoder_inc.h +7 -6
- data/vendor/brotli/c/enc/block_splitter.c +2 -3
- data/vendor/brotli/c/enc/block_splitter.h +1 -1
- data/vendor/brotli/c/enc/block_splitter_inc.h +11 -11
- data/vendor/brotli/c/enc/brotli_bit_stream.c +102 -101
- data/vendor/brotli/c/enc/brotli_bit_stream.h +19 -38
- data/vendor/brotli/c/enc/cluster.c +1 -1
- data/vendor/brotli/c/enc/cluster.h +1 -1
- data/vendor/brotli/c/enc/command.h +40 -30
- data/vendor/brotli/c/enc/compress_fragment.c +21 -22
- data/vendor/brotli/c/enc/compress_fragment.h +1 -1
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +101 -68
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +1 -1
- data/vendor/brotli/c/enc/dictionary_hash.c +1 -1
- data/vendor/brotli/c/enc/encode.c +262 -162
- data/vendor/brotli/c/enc/encoder_dict.c +32 -0
- data/vendor/brotli/c/enc/encoder_dict.h +41 -0
- data/vendor/brotli/c/enc/entropy_encode.c +14 -14
- data/vendor/brotli/c/enc/entropy_encode.h +5 -5
- data/vendor/brotli/c/enc/entropy_encode_static.h +3 -3
- data/vendor/brotli/c/enc/fast_log.h +4 -2
- data/vendor/brotli/c/enc/find_match_length.h +3 -3
- data/vendor/brotli/c/enc/hash.h +75 -24
- data/vendor/brotli/c/enc/hash_composite_inc.h +133 -0
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +9 -8
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +8 -8
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +8 -8
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +10 -9
- data/vendor/brotli/c/enc/hash_rolling_inc.h +215 -0
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +9 -8
- data/vendor/brotli/c/enc/histogram.c +9 -6
- data/vendor/brotli/c/enc/histogram.h +6 -3
- data/vendor/brotli/c/enc/histogram_inc.h +1 -1
- data/vendor/brotli/c/enc/literal_cost.c +5 -5
- data/vendor/brotli/c/enc/literal_cost.h +2 -2
- data/vendor/brotli/c/enc/memory.c +5 -16
- data/vendor/brotli/c/enc/memory.h +40 -1
- data/vendor/brotli/c/enc/metablock.c +163 -25
- data/vendor/brotli/c/enc/metablock.h +13 -8
- data/vendor/brotli/c/enc/metablock_inc.h +1 -1
- data/vendor/brotli/c/enc/params.h +44 -0
- data/vendor/brotli/c/enc/prefix.h +3 -4
- data/vendor/brotli/c/enc/quality.h +29 -24
- data/vendor/brotli/c/enc/ringbuffer.h +15 -11
- data/vendor/brotli/c/enc/static_dict.c +49 -45
- data/vendor/brotli/c/enc/static_dict.h +4 -3
- data/vendor/brotli/c/enc/static_dict_lut.h +1 -1
- data/vendor/brotli/c/enc/utf8_util.c +20 -20
- data/vendor/brotli/c/enc/utf8_util.h +1 -1
- data/vendor/brotli/c/enc/write_bits.h +16 -21
- data/vendor/brotli/c/include/brotli/decode.h +13 -8
- data/vendor/brotli/c/include/brotli/encode.h +33 -8
- data/vendor/brotli/c/include/brotli/port.h +211 -83
- data/vendor/brotli/c/include/brotli/types.h +0 -7
- metadata +33 -12
- data/vendor/brotli/c/dec/port.h +0 -168
- data/vendor/brotli/c/dec/transform.h +0 -300
- data/vendor/brotli/c/enc/context.h +0 -184
- data/vendor/brotli/c/enc/port.h +0 -184
@@ -10,13 +10,13 @@
|
|
10
10
|
#ifndef BROTLI_ENC_METABLOCK_H_
|
11
11
|
#define BROTLI_ENC_METABLOCK_H_
|
12
12
|
|
13
|
+
#include "../common/context.h"
|
14
|
+
#include "../common/platform.h"
|
13
15
|
#include <brotli/types.h>
|
14
16
|
#include "./block_splitter.h"
|
15
17
|
#include "./command.h"
|
16
|
-
#include "./context.h"
|
17
18
|
#include "./histogram.h"
|
18
19
|
#include "./memory.h"
|
19
|
-
#include "./port.h"
|
20
20
|
#include "./quality.h"
|
21
21
|
|
22
22
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -67,15 +67,18 @@ static BROTLI_INLINE void DestroyMetaBlockSplit(
|
|
67
67
|
BROTLI_FREE(m, mb->distance_histograms);
|
68
68
|
}
|
69
69
|
|
70
|
-
/* Uses the slow shortest-path block splitter and does context clustering.
|
70
|
+
/* Uses the slow shortest-path block splitter and does context clustering.
|
71
|
+
The distance parameters are dynamically selected based on the commands
|
72
|
+
which get recomputed under the new distance parameters. The new distance
|
73
|
+
parameters are stored into *params. */
|
71
74
|
BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
72
75
|
const uint8_t* ringbuffer,
|
73
76
|
const size_t pos,
|
74
77
|
const size_t mask,
|
75
|
-
|
78
|
+
BrotliEncoderParams* params,
|
76
79
|
uint8_t prev_byte,
|
77
80
|
uint8_t prev_byte2,
|
78
|
-
|
81
|
+
Command* cmds,
|
79
82
|
size_t num_commands,
|
80
83
|
ContextType literal_context_mode,
|
81
84
|
MetaBlockSplit* mb);
|
@@ -85,14 +88,16 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
|
85
88
|
is the same for all block types. */
|
86
89
|
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
|
87
90
|
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
88
|
-
uint8_t prev_byte, uint8_t prev_byte2,
|
91
|
+
uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
|
89
92
|
size_t num_contexts, const uint32_t* static_context_map,
|
90
93
|
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
|
91
94
|
|
92
|
-
BROTLI_INTERNAL void BrotliOptimizeHistograms(
|
93
|
-
size_t distance_postfix_bits,
|
95
|
+
BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
|
94
96
|
MetaBlockSplit* mb);
|
95
97
|
|
98
|
+
BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
|
99
|
+
uint32_t npostfix, uint32_t ndirect);
|
100
|
+
|
96
101
|
#if defined(__cplusplus) || defined(c_plusplus)
|
97
102
|
} /* extern "C" */
|
98
103
|
#endif
|
@@ -67,7 +67,7 @@ static void FN(InitBlockSplitter)(
|
|
67
67
|
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
68
68
|
if (BROTLI_IS_OOM(m)) return;
|
69
69
|
self->split_->num_blocks = max_num_blocks;
|
70
|
-
|
70
|
+
BROTLI_DCHECK(*histograms == 0);
|
71
71
|
*histograms_size = max_num_types;
|
72
72
|
*histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
|
73
73
|
self->histograms_ = *histograms;
|
@@ -0,0 +1,44 @@
|
|
1
|
+
/* Copyright 2017 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Parameters for the Brotli encoder with chosen quality levels. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_PARAMS_H_
|
10
|
+
#define BROTLI_ENC_PARAMS_H_
|
11
|
+
|
12
|
+
#include <brotli/encode.h>
|
13
|
+
#include "./encoder_dict.h"
|
14
|
+
|
15
|
+
typedef struct BrotliHasherParams {
|
16
|
+
int type;
|
17
|
+
int bucket_bits;
|
18
|
+
int block_bits;
|
19
|
+
int hash_len;
|
20
|
+
int num_last_distances_to_check;
|
21
|
+
} BrotliHasherParams;
|
22
|
+
|
23
|
+
typedef struct BrotliDistanceParams {
|
24
|
+
uint32_t distance_postfix_bits;
|
25
|
+
uint32_t num_direct_distance_codes;
|
26
|
+
uint32_t alphabet_size;
|
27
|
+
size_t max_distance;
|
28
|
+
} BrotliDistanceParams;
|
29
|
+
|
30
|
+
/* Encoding parameters */
|
31
|
+
typedef struct BrotliEncoderParams {
|
32
|
+
BrotliEncoderMode mode;
|
33
|
+
int quality;
|
34
|
+
int lgwin;
|
35
|
+
int lgblock;
|
36
|
+
size_t size_hint;
|
37
|
+
BROTLI_BOOL disable_literal_context_modeling;
|
38
|
+
BROTLI_BOOL large_window;
|
39
|
+
BrotliHasherParams hasher;
|
40
|
+
BrotliDistanceParams dist;
|
41
|
+
BrotliEncoderDictionary dictionary;
|
42
|
+
} BrotliEncoderParams;
|
43
|
+
|
44
|
+
#endif /* BROTLI_ENC_PARAMS_H_ */
|
@@ -11,7 +11,7 @@
|
|
11
11
|
#define BROTLI_ENC_PREFIX_H_
|
12
12
|
|
13
13
|
#include "../common/constants.h"
|
14
|
-
#include
|
14
|
+
#include "../common/platform.h"
|
15
15
|
#include <brotli/types.h>
|
16
16
|
#include "./fast_log.h"
|
17
17
|
|
@@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
|
|
39
39
|
size_t prefix = (dist >> bucket) & 1;
|
40
40
|
size_t offset = (2 + prefix) << bucket;
|
41
41
|
size_t nbits = bucket - postfix_bits;
|
42
|
-
*code = (uint16_t)(
|
42
|
+
*code = (uint16_t)((nbits << 10) |
|
43
43
|
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
|
44
44
|
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
45
|
-
*extra_bits = (uint32_t)(
|
46
|
-
(nbits << 24) | ((dist - offset) >> postfix_bits));
|
45
|
+
*extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
|
47
46
|
}
|
48
47
|
}
|
49
48
|
|
@@ -10,7 +10,9 @@
|
|
10
10
|
#ifndef BROTLI_ENC_QUALITY_H_
|
11
11
|
#define BROTLI_ENC_QUALITY_H_
|
12
12
|
|
13
|
+
#include "../common/platform.h"
|
13
14
|
#include <brotli/encode.h>
|
15
|
+
#include "./params.h"
|
14
16
|
|
15
17
|
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
|
16
18
|
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
|
@@ -19,36 +21,16 @@
|
|
19
21
|
|
20
22
|
#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
|
21
23
|
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
|
24
|
+
#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
|
22
25
|
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
|
23
26
|
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
|
24
27
|
#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
|
25
28
|
#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
|
26
29
|
#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
|
27
|
-
/* Only for "font" mode. */
|
28
|
-
#define MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES 10
|
29
30
|
|
30
31
|
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
|
31
32
|
so we buffer at most this much literals and commands. */
|
32
|
-
#define MAX_NUM_DELAYED_SYMBOLS
|
33
|
-
|
34
|
-
typedef struct BrotliHasherParams {
|
35
|
-
int type;
|
36
|
-
int bucket_bits;
|
37
|
-
int block_bits;
|
38
|
-
int hash_len;
|
39
|
-
int num_last_distances_to_check;
|
40
|
-
} BrotliHasherParams;
|
41
|
-
|
42
|
-
/* Encoding parameters */
|
43
|
-
typedef struct BrotliEncoderParams {
|
44
|
-
BrotliEncoderMode mode;
|
45
|
-
int quality;
|
46
|
-
int lgwin;
|
47
|
-
int lgblock;
|
48
|
-
size_t size_hint;
|
49
|
-
BROTLI_BOOL disable_literal_context_modeling;
|
50
|
-
BrotliHasherParams hasher;
|
51
|
-
} BrotliEncoderParams;
|
33
|
+
#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
|
52
34
|
|
53
35
|
/* Returns hash-table size for quality levels 0 and 1. */
|
54
36
|
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
|
@@ -77,10 +59,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates(
|
|
77
59
|
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
|
78
60
|
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
|
79
61
|
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
|
62
|
+
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
|
63
|
+
params->large_window = BROTLI_FALSE;
|
64
|
+
}
|
80
65
|
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
|
81
66
|
params->lgwin = BROTLI_MIN_WINDOW_BITS;
|
82
|
-
} else
|
83
|
-
params->
|
67
|
+
} else {
|
68
|
+
int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
|
69
|
+
BROTLI_MAX_WINDOW_BITS;
|
70
|
+
if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
|
84
71
|
}
|
85
72
|
}
|
86
73
|
|
@@ -155,6 +142,24 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
|
|
155
142
|
hparams->num_last_distances_to_check =
|
156
143
|
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
157
144
|
}
|
145
|
+
|
146
|
+
if (params->lgwin > 24) {
|
147
|
+
/* Different hashers for large window brotli: not for qualities <= 2,
|
148
|
+
these are too fast for large window. Not for qualities >= 10: their
|
149
|
+
hasher already works well with large window. So the changes are:
|
150
|
+
H3 --> H35: for quality 3.
|
151
|
+
H54 --> H55: for quality 4 with size hint > 1MB
|
152
|
+
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
|
153
|
+
if (hparams->type == 3) {
|
154
|
+
hparams->type = 35;
|
155
|
+
}
|
156
|
+
if (hparams->type == 54) {
|
157
|
+
hparams->type = 55;
|
158
|
+
}
|
159
|
+
if (hparams->type == 6) {
|
160
|
+
hparams->type = 65;
|
161
|
+
}
|
162
|
+
}
|
158
163
|
}
|
159
164
|
|
160
165
|
#endif /* BROTLI_ENC_QUALITY_H_ */
|
@@ -11,9 +11,9 @@
|
|
11
11
|
|
12
12
|
#include <string.h> /* memcpy */
|
13
13
|
|
14
|
+
#include "../common/platform.h"
|
14
15
|
#include <brotli/types.h>
|
15
16
|
#include "./memory.h"
|
16
|
-
#include "./port.h"
|
17
17
|
#include "./quality.h"
|
18
18
|
|
19
19
|
#if defined(__cplusplus) || defined(c_plusplus)
|
@@ -41,9 +41,9 @@ typedef struct RingBuffer {
|
|
41
41
|
uint32_t pos_;
|
42
42
|
/* The actual ring buffer containing the copy of the last two bytes, the data,
|
43
43
|
and the copy of the beginning as a tail. */
|
44
|
-
uint8_t
|
44
|
+
uint8_t* data_;
|
45
45
|
/* The start of the ring-buffer. */
|
46
|
-
uint8_t
|
46
|
+
uint8_t* buffer_;
|
47
47
|
} RingBuffer;
|
48
48
|
|
49
49
|
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
|
@@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer(
|
|
91
91
|
}
|
92
92
|
|
93
93
|
static BROTLI_INLINE void RingBufferWriteTail(
|
94
|
-
const uint8_t
|
94
|
+
const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
95
95
|
const size_t masked_pos = rb->pos_ & rb->mask_;
|
96
96
|
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
|
97
97
|
/* Just fill the tail buffer with the beginning data. */
|
@@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail(
|
|
103
103
|
|
104
104
|
/* Push bytes into the ring buffer. */
|
105
105
|
static BROTLI_INLINE void RingBufferWrite(
|
106
|
-
MemoryManager* m, const uint8_t
|
106
|
+
MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
107
107
|
if (rb->pos_ == 0 && n < rb->tail_size_) {
|
108
108
|
/* Special case for the first write: to process the first block, we don't
|
109
109
|
need to allocate the whole ring-buffer and we don't need the tail
|
@@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite(
|
|
144
144
|
n - (rb->size_ - masked_pos));
|
145
145
|
}
|
146
146
|
}
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
rb->pos_ = (rb->pos_ &
|
147
|
+
{
|
148
|
+
BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
|
149
|
+
uint32_t rb_pos_mask = (1u << 31) - 1;
|
150
|
+
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
|
151
|
+
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
|
152
|
+
rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
|
153
|
+
if (not_first_lap) {
|
154
|
+
/* Wrap, but preserve not-a-first-lap feature. */
|
155
|
+
rb->pos_ |= 1u << 31;
|
156
|
+
}
|
153
157
|
}
|
154
158
|
}
|
155
159
|
|
@@ -7,21 +7,17 @@
|
|
7
7
|
#include "./static_dict.h"
|
8
8
|
|
9
9
|
#include "../common/dictionary.h"
|
10
|
+
#include "../common/platform.h"
|
11
|
+
#include "../common/transform.h"
|
12
|
+
#include "./encoder_dict.h"
|
10
13
|
#include "./find_match_length.h"
|
11
|
-
#include "./port.h"
|
12
|
-
#include "./static_dict_lut.h"
|
13
14
|
|
14
15
|
#if defined(__cplusplus) || defined(c_plusplus)
|
15
16
|
extern "C" {
|
16
17
|
#endif
|
17
18
|
|
18
|
-
static const uint8_t
|
19
|
-
|
20
|
-
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
21
|
-
};
|
22
|
-
|
23
|
-
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
|
24
|
-
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
19
|
+
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
|
20
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
|
25
21
|
/* The higher bits contain more mixture from the multiplication,
|
26
22
|
so we take our results from there. */
|
27
23
|
return h >> (32 - kDictNumBits);
|
@@ -79,32 +75,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
|
79
75
|
}
|
80
76
|
|
81
77
|
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
82
|
-
const
|
83
|
-
size_t max_length, uint32_t* matches) {
|
78
|
+
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
|
79
|
+
size_t min_length, size_t max_length, uint32_t* matches) {
|
84
80
|
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
85
81
|
{
|
86
|
-
size_t offset =
|
82
|
+
size_t offset = dictionary->buckets[Hash(data)];
|
87
83
|
BROTLI_BOOL end = !offset;
|
88
84
|
while (!end) {
|
89
|
-
DictWord w =
|
85
|
+
DictWord w = dictionary->dict_words[offset++];
|
90
86
|
const size_t l = w.len & 0x1F;
|
91
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
87
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
92
88
|
const size_t id = w.idx;
|
93
89
|
end = !!(w.len & 0x80);
|
94
90
|
w.len = (uint8_t)l;
|
95
91
|
if (w.transform == 0) {
|
96
92
|
const size_t matchlen =
|
97
|
-
DictMatchLength(dictionary, data, id, l, max_length);
|
93
|
+
DictMatchLength(dictionary->words, data, id, l, max_length);
|
98
94
|
const uint8_t* s;
|
99
95
|
size_t minlen;
|
100
96
|
size_t maxlen;
|
101
97
|
size_t len;
|
102
|
-
/* Transform "" +
|
98
|
+
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
|
103
99
|
if (matchlen == l) {
|
104
100
|
AddMatch(id, l, l, matches);
|
105
101
|
has_found_match = BROTLI_TRUE;
|
106
102
|
}
|
107
|
-
/* Transforms "" +
|
103
|
+
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
|
104
|
+
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
|
108
105
|
if (matchlen >= l - 1) {
|
109
106
|
AddMatch(id + 12 * n, l - 1, l, matches);
|
110
107
|
if (l + 2 < max_length &&
|
@@ -114,19 +111,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
114
111
|
}
|
115
112
|
has_found_match = BROTLI_TRUE;
|
116
113
|
}
|
117
|
-
/* Transform "" +
|
114
|
+
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
|
118
115
|
minlen = min_length;
|
119
116
|
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
120
117
|
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
121
118
|
for (len = minlen; len <= maxlen; ++len) {
|
122
|
-
|
119
|
+
size_t cut = l - len;
|
120
|
+
size_t transform_id = (cut << 2) +
|
121
|
+
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
|
122
|
+
AddMatch(id + transform_id * n, len, l, matches);
|
123
123
|
has_found_match = BROTLI_TRUE;
|
124
124
|
}
|
125
125
|
if (matchlen < l || l + 6 >= max_length) {
|
126
126
|
continue;
|
127
127
|
}
|
128
128
|
s = &data[l];
|
129
|
-
/* Transforms "" +
|
129
|
+
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
|
130
130
|
if (s[0] == ' ') {
|
131
131
|
AddMatch(id + n, l + 1, l, matches);
|
132
132
|
if (s[1] == 'a') {
|
@@ -273,12 +273,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
273
273
|
}
|
274
274
|
}
|
275
275
|
} else {
|
276
|
-
/* Set is_all_caps=0 for
|
277
|
-
is_all_caps=1 otherwise (
|
276
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
277
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
278
|
+
transform. */
|
278
279
|
const BROTLI_BOOL is_all_caps =
|
279
|
-
TO_BROTLI_BOOL(w.transform !=
|
280
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
280
281
|
const uint8_t* s;
|
281
|
-
if (!IsMatch(dictionary, w, data, max_length)) {
|
282
|
+
if (!IsMatch(dictionary->words, w, data, max_length)) {
|
282
283
|
continue;
|
283
284
|
}
|
284
285
|
/* Transform "" + kUppercase{First,All} + "" */
|
@@ -323,27 +324,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
323
324
|
/* Transforms with prefixes " " and "." */
|
324
325
|
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
325
326
|
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
326
|
-
size_t offset =
|
327
|
+
size_t offset = dictionary->buckets[Hash(&data[1])];
|
327
328
|
BROTLI_BOOL end = !offset;
|
328
329
|
while (!end) {
|
329
|
-
DictWord w =
|
330
|
+
DictWord w = dictionary->dict_words[offset++];
|
330
331
|
const size_t l = w.len & 0x1F;
|
331
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
332
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
332
333
|
const size_t id = w.idx;
|
333
334
|
end = !!(w.len & 0x80);
|
334
335
|
w.len = (uint8_t)l;
|
335
336
|
if (w.transform == 0) {
|
336
337
|
const uint8_t* s;
|
337
|
-
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
338
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
338
339
|
continue;
|
339
340
|
}
|
340
|
-
/* Transforms " " +
|
341
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
|
342
|
+
"." + BROTLI_TRANSFORM_IDENTITY + "" */
|
341
343
|
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
342
344
|
has_found_match = BROTLI_TRUE;
|
343
345
|
if (l + 2 >= max_length) {
|
344
346
|
continue;
|
345
347
|
}
|
346
|
-
/* Transforms " " +
|
348
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
|
349
|
+
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
|
347
350
|
*/
|
348
351
|
s = &data[l + 1];
|
349
352
|
if (s[0] == ' ') {
|
@@ -370,12 +373,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
370
373
|
}
|
371
374
|
}
|
372
375
|
} else if (is_space) {
|
373
|
-
/* Set is_all_caps=0 for
|
374
|
-
is_all_caps=1 otherwise (
|
376
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
377
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
378
|
+
transform. */
|
375
379
|
const BROTLI_BOOL is_all_caps =
|
376
|
-
TO_BROTLI_BOOL(w.transform !=
|
380
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
377
381
|
const uint8_t* s;
|
378
|
-
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
382
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
379
383
|
continue;
|
380
384
|
}
|
381
385
|
/* Transforms " " + kUppercase{First,All} + "" */
|
@@ -411,22 +415,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
411
415
|
}
|
412
416
|
}
|
413
417
|
if (max_length >= 6) {
|
414
|
-
/* Transforms with prefixes "e ", "s ", ", " and "\
|
418
|
+
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
|
415
419
|
if ((data[1] == ' ' &&
|
416
420
|
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
417
|
-
(data[0] ==
|
418
|
-
size_t offset =
|
421
|
+
(data[0] == 0xC2 && data[1] == 0xA0)) {
|
422
|
+
size_t offset = dictionary->buckets[Hash(&data[2])];
|
419
423
|
BROTLI_BOOL end = !offset;
|
420
424
|
while (!end) {
|
421
|
-
DictWord w =
|
425
|
+
DictWord w = dictionary->dict_words[offset++];
|
422
426
|
const size_t l = w.len & 0x1F;
|
423
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
427
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
424
428
|
const size_t id = w.idx;
|
425
429
|
end = !!(w.len & 0x80);
|
426
430
|
w.len = (uint8_t)l;
|
427
431
|
if (w.transform == 0 &&
|
428
|
-
IsMatch(dictionary, w, &data[2], max_length - 2)) {
|
429
|
-
if (data[0] ==
|
432
|
+
IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
|
433
|
+
if (data[0] == 0xC2) {
|
430
434
|
AddMatch(id + 102 * n, l + 2, l, matches);
|
431
435
|
has_found_match = BROTLI_TRUE;
|
432
436
|
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
@@ -444,17 +448,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
444
448
|
data[3] == 'e' && data[4] == ' ') ||
|
445
449
|
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
446
450
|
data[3] == 'm' && data[4] == '/')) {
|
447
|
-
size_t offset =
|
451
|
+
size_t offset = dictionary->buckets[Hash(&data[5])];
|
448
452
|
BROTLI_BOOL end = !offset;
|
449
453
|
while (!end) {
|
450
|
-
DictWord w =
|
454
|
+
DictWord w = dictionary->dict_words[offset++];
|
451
455
|
const size_t l = w.len & 0x1F;
|
452
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
456
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
453
457
|
const size_t id = w.idx;
|
454
458
|
end = !!(w.len & 0x80);
|
455
459
|
w.len = (uint8_t)l;
|
456
460
|
if (w.transform == 0 &&
|
457
|
-
IsMatch(dictionary, w, &data[5], max_length - 5)) {
|
461
|
+
IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
|
458
462
|
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
459
463
|
has_found_match = BROTLI_TRUE;
|
460
464
|
if (l + 5 < max_length) {
|