brotli 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +1 -0
- data/Rakefile +6 -1
- data/brotli.gemspec +1 -1
- data/docs/Brotli.html +485 -0
- data/docs/Brotli/Error.html +124 -0
- data/docs/_index.html +122 -0
- data/docs/class_list.html +51 -0
- data/docs/css/common.css +1 -0
- data/docs/css/full_list.css +58 -0
- data/docs/css/style.css +496 -0
- data/docs/file.README.html +127 -0
- data/docs/file_list.html +56 -0
- data/docs/frames.html +17 -0
- data/docs/index.html +127 -0
- data/docs/js/app.js +292 -0
- data/docs/js/full_list.js +216 -0
- data/docs/js/jquery.js +4 -0
- data/docs/method_list.html +67 -0
- data/docs/top-level-namespace.html +110 -0
- data/ext/brotli/brotli.c +20 -0
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/c/common/constants.h +13 -6
- data/vendor/brotli/c/{dec → common}/context.h +182 -172
- data/vendor/brotli/c/common/dictionary.bin +0 -0
- data/vendor/brotli/c/common/dictionary.bin.br +0 -0
- data/vendor/brotli/c/common/dictionary.c +1 -1
- data/vendor/brotli/c/common/dictionary.h +4 -4
- data/vendor/brotli/c/common/platform.h +509 -0
- data/vendor/brotli/c/common/transform.c +235 -0
- data/vendor/brotli/c/common/transform.h +80 -0
- data/vendor/brotli/c/common/version.h +8 -1
- data/vendor/brotli/c/dec/bit_reader.c +1 -1
- data/vendor/brotli/c/dec/bit_reader.h +35 -86
- data/vendor/brotli/c/dec/decode.c +322 -205
- data/vendor/brotli/c/dec/huffman.c +35 -37
- data/vendor/brotli/c/dec/huffman.h +13 -9
- data/vendor/brotli/c/dec/prefix.h +3 -4
- data/vendor/brotli/c/dec/state.c +26 -34
- data/vendor/brotli/c/dec/state.h +34 -23
- data/vendor/brotli/c/enc/backward_references.c +25 -15
- data/vendor/brotli/c/enc/backward_references.h +5 -6
- data/vendor/brotli/c/enc/backward_references_hq.c +94 -68
- data/vendor/brotli/c/enc/backward_references_hq.h +22 -25
- data/vendor/brotli/c/enc/backward_references_inc.h +10 -10
- data/vendor/brotli/c/enc/bit_cost.c +1 -1
- data/vendor/brotli/c/enc/bit_cost.h +5 -5
- data/vendor/brotli/c/enc/block_encoder_inc.h +7 -6
- data/vendor/brotli/c/enc/block_splitter.c +2 -3
- data/vendor/brotli/c/enc/block_splitter.h +1 -1
- data/vendor/brotli/c/enc/block_splitter_inc.h +11 -11
- data/vendor/brotli/c/enc/brotli_bit_stream.c +102 -101
- data/vendor/brotli/c/enc/brotli_bit_stream.h +19 -38
- data/vendor/brotli/c/enc/cluster.c +1 -1
- data/vendor/brotli/c/enc/cluster.h +1 -1
- data/vendor/brotli/c/enc/command.h +40 -30
- data/vendor/brotli/c/enc/compress_fragment.c +21 -22
- data/vendor/brotli/c/enc/compress_fragment.h +1 -1
- data/vendor/brotli/c/enc/compress_fragment_two_pass.c +101 -68
- data/vendor/brotli/c/enc/compress_fragment_two_pass.h +1 -1
- data/vendor/brotli/c/enc/dictionary_hash.c +1 -1
- data/vendor/brotli/c/enc/encode.c +262 -162
- data/vendor/brotli/c/enc/encoder_dict.c +32 -0
- data/vendor/brotli/c/enc/encoder_dict.h +41 -0
- data/vendor/brotli/c/enc/entropy_encode.c +14 -14
- data/vendor/brotli/c/enc/entropy_encode.h +5 -5
- data/vendor/brotli/c/enc/entropy_encode_static.h +3 -3
- data/vendor/brotli/c/enc/fast_log.h +4 -2
- data/vendor/brotli/c/enc/find_match_length.h +3 -3
- data/vendor/brotli/c/enc/hash.h +75 -24
- data/vendor/brotli/c/enc/hash_composite_inc.h +133 -0
- data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +9 -8
- data/vendor/brotli/c/enc/hash_longest_match64_inc.h +8 -8
- data/vendor/brotli/c/enc/hash_longest_match_inc.h +8 -8
- data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +10 -9
- data/vendor/brotli/c/enc/hash_rolling_inc.h +215 -0
- data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +9 -8
- data/vendor/brotli/c/enc/histogram.c +9 -6
- data/vendor/brotli/c/enc/histogram.h +6 -3
- data/vendor/brotli/c/enc/histogram_inc.h +1 -1
- data/vendor/brotli/c/enc/literal_cost.c +5 -5
- data/vendor/brotli/c/enc/literal_cost.h +2 -2
- data/vendor/brotli/c/enc/memory.c +5 -16
- data/vendor/brotli/c/enc/memory.h +40 -1
- data/vendor/brotli/c/enc/metablock.c +163 -25
- data/vendor/brotli/c/enc/metablock.h +13 -8
- data/vendor/brotli/c/enc/metablock_inc.h +1 -1
- data/vendor/brotli/c/enc/params.h +44 -0
- data/vendor/brotli/c/enc/prefix.h +3 -4
- data/vendor/brotli/c/enc/quality.h +29 -24
- data/vendor/brotli/c/enc/ringbuffer.h +15 -11
- data/vendor/brotli/c/enc/static_dict.c +49 -45
- data/vendor/brotli/c/enc/static_dict.h +4 -3
- data/vendor/brotli/c/enc/static_dict_lut.h +1 -1
- data/vendor/brotli/c/enc/utf8_util.c +20 -20
- data/vendor/brotli/c/enc/utf8_util.h +1 -1
- data/vendor/brotli/c/enc/write_bits.h +16 -21
- data/vendor/brotli/c/include/brotli/decode.h +13 -8
- data/vendor/brotli/c/include/brotli/encode.h +33 -8
- data/vendor/brotli/c/include/brotli/port.h +211 -83
- data/vendor/brotli/c/include/brotli/types.h +0 -7
- metadata +33 -12
- data/vendor/brotli/c/dec/port.h +0 -168
- data/vendor/brotli/c/dec/transform.h +0 -300
- data/vendor/brotli/c/enc/context.h +0 -184
- data/vendor/brotli/c/enc/port.h +0 -184
|
@@ -10,13 +10,13 @@
|
|
|
10
10
|
#ifndef BROTLI_ENC_METABLOCK_H_
|
|
11
11
|
#define BROTLI_ENC_METABLOCK_H_
|
|
12
12
|
|
|
13
|
+
#include "../common/context.h"
|
|
14
|
+
#include "../common/platform.h"
|
|
13
15
|
#include <brotli/types.h>
|
|
14
16
|
#include "./block_splitter.h"
|
|
15
17
|
#include "./command.h"
|
|
16
|
-
#include "./context.h"
|
|
17
18
|
#include "./histogram.h"
|
|
18
19
|
#include "./memory.h"
|
|
19
|
-
#include "./port.h"
|
|
20
20
|
#include "./quality.h"
|
|
21
21
|
|
|
22
22
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
@@ -67,15 +67,18 @@ static BROTLI_INLINE void DestroyMetaBlockSplit(
|
|
|
67
67
|
BROTLI_FREE(m, mb->distance_histograms);
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
/* Uses the slow shortest-path block splitter and does context clustering.
|
|
70
|
+
/* Uses the slow shortest-path block splitter and does context clustering.
|
|
71
|
+
The distance parameters are dynamically selected based on the commands
|
|
72
|
+
which get recomputed under the new distance parameters. The new distance
|
|
73
|
+
parameters are stored into *params. */
|
|
71
74
|
BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
|
72
75
|
const uint8_t* ringbuffer,
|
|
73
76
|
const size_t pos,
|
|
74
77
|
const size_t mask,
|
|
75
|
-
|
|
78
|
+
BrotliEncoderParams* params,
|
|
76
79
|
uint8_t prev_byte,
|
|
77
80
|
uint8_t prev_byte2,
|
|
78
|
-
|
|
81
|
+
Command* cmds,
|
|
79
82
|
size_t num_commands,
|
|
80
83
|
ContextType literal_context_mode,
|
|
81
84
|
MetaBlockSplit* mb);
|
|
@@ -85,14 +88,16 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
|
|
85
88
|
is the same for all block types. */
|
|
86
89
|
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
|
|
87
90
|
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
|
88
|
-
uint8_t prev_byte, uint8_t prev_byte2,
|
|
91
|
+
uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
|
|
89
92
|
size_t num_contexts, const uint32_t* static_context_map,
|
|
90
93
|
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
|
|
91
94
|
|
|
92
|
-
BROTLI_INTERNAL void BrotliOptimizeHistograms(
|
|
93
|
-
size_t distance_postfix_bits,
|
|
95
|
+
BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
|
|
94
96
|
MetaBlockSplit* mb);
|
|
95
97
|
|
|
98
|
+
BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
|
|
99
|
+
uint32_t npostfix, uint32_t ndirect);
|
|
100
|
+
|
|
96
101
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
97
102
|
} /* extern "C" */
|
|
98
103
|
#endif
|
|
@@ -67,7 +67,7 @@ static void FN(InitBlockSplitter)(
|
|
|
67
67
|
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
|
68
68
|
if (BROTLI_IS_OOM(m)) return;
|
|
69
69
|
self->split_->num_blocks = max_num_blocks;
|
|
70
|
-
|
|
70
|
+
BROTLI_DCHECK(*histograms == 0);
|
|
71
71
|
*histograms_size = max_num_types;
|
|
72
72
|
*histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
|
|
73
73
|
self->histograms_ = *histograms;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/* Copyright 2017 Google Inc. All Rights Reserved.
|
|
2
|
+
|
|
3
|
+
Distributed under MIT license.
|
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* Parameters for the Brotli encoder with chosen quality levels. */
|
|
8
|
+
|
|
9
|
+
#ifndef BROTLI_ENC_PARAMS_H_
|
|
10
|
+
#define BROTLI_ENC_PARAMS_H_
|
|
11
|
+
|
|
12
|
+
#include <brotli/encode.h>
|
|
13
|
+
#include "./encoder_dict.h"
|
|
14
|
+
|
|
15
|
+
typedef struct BrotliHasherParams {
|
|
16
|
+
int type;
|
|
17
|
+
int bucket_bits;
|
|
18
|
+
int block_bits;
|
|
19
|
+
int hash_len;
|
|
20
|
+
int num_last_distances_to_check;
|
|
21
|
+
} BrotliHasherParams;
|
|
22
|
+
|
|
23
|
+
typedef struct BrotliDistanceParams {
|
|
24
|
+
uint32_t distance_postfix_bits;
|
|
25
|
+
uint32_t num_direct_distance_codes;
|
|
26
|
+
uint32_t alphabet_size;
|
|
27
|
+
size_t max_distance;
|
|
28
|
+
} BrotliDistanceParams;
|
|
29
|
+
|
|
30
|
+
/* Encoding parameters */
|
|
31
|
+
typedef struct BrotliEncoderParams {
|
|
32
|
+
BrotliEncoderMode mode;
|
|
33
|
+
int quality;
|
|
34
|
+
int lgwin;
|
|
35
|
+
int lgblock;
|
|
36
|
+
size_t size_hint;
|
|
37
|
+
BROTLI_BOOL disable_literal_context_modeling;
|
|
38
|
+
BROTLI_BOOL large_window;
|
|
39
|
+
BrotliHasherParams hasher;
|
|
40
|
+
BrotliDistanceParams dist;
|
|
41
|
+
BrotliEncoderDictionary dictionary;
|
|
42
|
+
} BrotliEncoderParams;
|
|
43
|
+
|
|
44
|
+
#endif /* BROTLI_ENC_PARAMS_H_ */
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#define BROTLI_ENC_PREFIX_H_
|
|
12
12
|
|
|
13
13
|
#include "../common/constants.h"
|
|
14
|
-
#include
|
|
14
|
+
#include "../common/platform.h"
|
|
15
15
|
#include <brotli/types.h>
|
|
16
16
|
#include "./fast_log.h"
|
|
17
17
|
|
|
@@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
|
|
|
39
39
|
size_t prefix = (dist >> bucket) & 1;
|
|
40
40
|
size_t offset = (2 + prefix) << bucket;
|
|
41
41
|
size_t nbits = bucket - postfix_bits;
|
|
42
|
-
*code = (uint16_t)(
|
|
42
|
+
*code = (uint16_t)((nbits << 10) |
|
|
43
43
|
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
|
|
44
44
|
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
|
45
|
-
*extra_bits = (uint32_t)(
|
|
46
|
-
(nbits << 24) | ((dist - offset) >> postfix_bits));
|
|
45
|
+
*extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
|
|
47
46
|
}
|
|
48
47
|
}
|
|
49
48
|
|
|
@@ -10,7 +10,9 @@
|
|
|
10
10
|
#ifndef BROTLI_ENC_QUALITY_H_
|
|
11
11
|
#define BROTLI_ENC_QUALITY_H_
|
|
12
12
|
|
|
13
|
+
#include "../common/platform.h"
|
|
13
14
|
#include <brotli/encode.h>
|
|
15
|
+
#include "./params.h"
|
|
14
16
|
|
|
15
17
|
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
|
|
16
18
|
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
|
|
@@ -19,36 +21,16 @@
|
|
|
19
21
|
|
|
20
22
|
#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
|
|
21
23
|
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
|
|
24
|
+
#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
|
|
22
25
|
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
|
|
23
26
|
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
|
|
24
27
|
#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
|
|
25
28
|
#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
|
|
26
29
|
#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
|
|
27
|
-
/* Only for "font" mode. */
|
|
28
|
-
#define MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES 10
|
|
29
30
|
|
|
30
31
|
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
|
|
31
32
|
so we buffer at most this much literals and commands. */
|
|
32
|
-
#define MAX_NUM_DELAYED_SYMBOLS
|
|
33
|
-
|
|
34
|
-
typedef struct BrotliHasherParams {
|
|
35
|
-
int type;
|
|
36
|
-
int bucket_bits;
|
|
37
|
-
int block_bits;
|
|
38
|
-
int hash_len;
|
|
39
|
-
int num_last_distances_to_check;
|
|
40
|
-
} BrotliHasherParams;
|
|
41
|
-
|
|
42
|
-
/* Encoding parameters */
|
|
43
|
-
typedef struct BrotliEncoderParams {
|
|
44
|
-
BrotliEncoderMode mode;
|
|
45
|
-
int quality;
|
|
46
|
-
int lgwin;
|
|
47
|
-
int lgblock;
|
|
48
|
-
size_t size_hint;
|
|
49
|
-
BROTLI_BOOL disable_literal_context_modeling;
|
|
50
|
-
BrotliHasherParams hasher;
|
|
51
|
-
} BrotliEncoderParams;
|
|
33
|
+
#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
|
|
52
34
|
|
|
53
35
|
/* Returns hash-table size for quality levels 0 and 1. */
|
|
54
36
|
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
|
|
@@ -77,10 +59,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates(
|
|
|
77
59
|
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
|
|
78
60
|
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
|
|
79
61
|
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
|
|
62
|
+
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
|
|
63
|
+
params->large_window = BROTLI_FALSE;
|
|
64
|
+
}
|
|
80
65
|
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
|
|
81
66
|
params->lgwin = BROTLI_MIN_WINDOW_BITS;
|
|
82
|
-
} else
|
|
83
|
-
params->
|
|
67
|
+
} else {
|
|
68
|
+
int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
|
|
69
|
+
BROTLI_MAX_WINDOW_BITS;
|
|
70
|
+
if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
|
|
84
71
|
}
|
|
85
72
|
}
|
|
86
73
|
|
|
@@ -155,6 +142,24 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
|
|
|
155
142
|
hparams->num_last_distances_to_check =
|
|
156
143
|
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
|
157
144
|
}
|
|
145
|
+
|
|
146
|
+
if (params->lgwin > 24) {
|
|
147
|
+
/* Different hashers for large window brotli: not for qualities <= 2,
|
|
148
|
+
these are too fast for large window. Not for qualities >= 10: their
|
|
149
|
+
hasher already works well with large window. So the changes are:
|
|
150
|
+
H3 --> H35: for quality 3.
|
|
151
|
+
H54 --> H55: for quality 4 with size hint > 1MB
|
|
152
|
+
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
|
|
153
|
+
if (hparams->type == 3) {
|
|
154
|
+
hparams->type = 35;
|
|
155
|
+
}
|
|
156
|
+
if (hparams->type == 54) {
|
|
157
|
+
hparams->type = 55;
|
|
158
|
+
}
|
|
159
|
+
if (hparams->type == 6) {
|
|
160
|
+
hparams->type = 65;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
158
163
|
}
|
|
159
164
|
|
|
160
165
|
#endif /* BROTLI_ENC_QUALITY_H_ */
|
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
|
|
12
12
|
#include <string.h> /* memcpy */
|
|
13
13
|
|
|
14
|
+
#include "../common/platform.h"
|
|
14
15
|
#include <brotli/types.h>
|
|
15
16
|
#include "./memory.h"
|
|
16
|
-
#include "./port.h"
|
|
17
17
|
#include "./quality.h"
|
|
18
18
|
|
|
19
19
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
@@ -41,9 +41,9 @@ typedef struct RingBuffer {
|
|
|
41
41
|
uint32_t pos_;
|
|
42
42
|
/* The actual ring buffer containing the copy of the last two bytes, the data,
|
|
43
43
|
and the copy of the beginning as a tail. */
|
|
44
|
-
uint8_t
|
|
44
|
+
uint8_t* data_;
|
|
45
45
|
/* The start of the ring-buffer. */
|
|
46
|
-
uint8_t
|
|
46
|
+
uint8_t* buffer_;
|
|
47
47
|
} RingBuffer;
|
|
48
48
|
|
|
49
49
|
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
|
|
@@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer(
|
|
|
91
91
|
}
|
|
92
92
|
|
|
93
93
|
static BROTLI_INLINE void RingBufferWriteTail(
|
|
94
|
-
const uint8_t
|
|
94
|
+
const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
|
95
95
|
const size_t masked_pos = rb->pos_ & rb->mask_;
|
|
96
96
|
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
|
|
97
97
|
/* Just fill the tail buffer with the beginning data. */
|
|
@@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail(
|
|
|
103
103
|
|
|
104
104
|
/* Push bytes into the ring buffer. */
|
|
105
105
|
static BROTLI_INLINE void RingBufferWrite(
|
|
106
|
-
MemoryManager* m, const uint8_t
|
|
106
|
+
MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
|
107
107
|
if (rb->pos_ == 0 && n < rb->tail_size_) {
|
|
108
108
|
/* Special case for the first write: to process the first block, we don't
|
|
109
109
|
need to allocate the whole ring-buffer and we don't need the tail
|
|
@@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite(
|
|
|
144
144
|
n - (rb->size_ - masked_pos));
|
|
145
145
|
}
|
|
146
146
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
rb->pos_ = (rb->pos_ &
|
|
147
|
+
{
|
|
148
|
+
BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
|
|
149
|
+
uint32_t rb_pos_mask = (1u << 31) - 1;
|
|
150
|
+
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
|
|
151
|
+
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
|
|
152
|
+
rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
|
|
153
|
+
if (not_first_lap) {
|
|
154
|
+
/* Wrap, but preserve not-a-first-lap feature. */
|
|
155
|
+
rb->pos_ |= 1u << 31;
|
|
156
|
+
}
|
|
153
157
|
}
|
|
154
158
|
}
|
|
155
159
|
|
|
@@ -7,21 +7,17 @@
|
|
|
7
7
|
#include "./static_dict.h"
|
|
8
8
|
|
|
9
9
|
#include "../common/dictionary.h"
|
|
10
|
+
#include "../common/platform.h"
|
|
11
|
+
#include "../common/transform.h"
|
|
12
|
+
#include "./encoder_dict.h"
|
|
10
13
|
#include "./find_match_length.h"
|
|
11
|
-
#include "./port.h"
|
|
12
|
-
#include "./static_dict_lut.h"
|
|
13
14
|
|
|
14
15
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
15
16
|
extern "C" {
|
|
16
17
|
#endif
|
|
17
18
|
|
|
18
|
-
static const uint8_t
|
|
19
|
-
|
|
20
|
-
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
|
|
24
|
-
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
|
19
|
+
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
|
|
20
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
|
|
25
21
|
/* The higher bits contain more mixture from the multiplication,
|
|
26
22
|
so we take our results from there. */
|
|
27
23
|
return h >> (32 - kDictNumBits);
|
|
@@ -79,32 +75,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
|
|
79
75
|
}
|
|
80
76
|
|
|
81
77
|
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
82
|
-
const
|
|
83
|
-
size_t max_length, uint32_t* matches) {
|
|
78
|
+
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
|
|
79
|
+
size_t min_length, size_t max_length, uint32_t* matches) {
|
|
84
80
|
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
|
85
81
|
{
|
|
86
|
-
size_t offset =
|
|
82
|
+
size_t offset = dictionary->buckets[Hash(data)];
|
|
87
83
|
BROTLI_BOOL end = !offset;
|
|
88
84
|
while (!end) {
|
|
89
|
-
DictWord w =
|
|
85
|
+
DictWord w = dictionary->dict_words[offset++];
|
|
90
86
|
const size_t l = w.len & 0x1F;
|
|
91
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
|
87
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
|
92
88
|
const size_t id = w.idx;
|
|
93
89
|
end = !!(w.len & 0x80);
|
|
94
90
|
w.len = (uint8_t)l;
|
|
95
91
|
if (w.transform == 0) {
|
|
96
92
|
const size_t matchlen =
|
|
97
|
-
DictMatchLength(dictionary, data, id, l, max_length);
|
|
93
|
+
DictMatchLength(dictionary->words, data, id, l, max_length);
|
|
98
94
|
const uint8_t* s;
|
|
99
95
|
size_t minlen;
|
|
100
96
|
size_t maxlen;
|
|
101
97
|
size_t len;
|
|
102
|
-
/* Transform "" +
|
|
98
|
+
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
|
|
103
99
|
if (matchlen == l) {
|
|
104
100
|
AddMatch(id, l, l, matches);
|
|
105
101
|
has_found_match = BROTLI_TRUE;
|
|
106
102
|
}
|
|
107
|
-
/* Transforms "" +
|
|
103
|
+
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
|
|
104
|
+
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
|
|
108
105
|
if (matchlen >= l - 1) {
|
|
109
106
|
AddMatch(id + 12 * n, l - 1, l, matches);
|
|
110
107
|
if (l + 2 < max_length &&
|
|
@@ -114,19 +111,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
|
114
111
|
}
|
|
115
112
|
has_found_match = BROTLI_TRUE;
|
|
116
113
|
}
|
|
117
|
-
/* Transform "" +
|
|
114
|
+
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
|
|
118
115
|
minlen = min_length;
|
|
119
116
|
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
|
120
117
|
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
|
121
118
|
for (len = minlen; len <= maxlen; ++len) {
|
|
122
|
-
|
|
119
|
+
size_t cut = l - len;
|
|
120
|
+
size_t transform_id = (cut << 2) +
|
|
121
|
+
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
|
|
122
|
+
AddMatch(id + transform_id * n, len, l, matches);
|
|
123
123
|
has_found_match = BROTLI_TRUE;
|
|
124
124
|
}
|
|
125
125
|
if (matchlen < l || l + 6 >= max_length) {
|
|
126
126
|
continue;
|
|
127
127
|
}
|
|
128
128
|
s = &data[l];
|
|
129
|
-
/* Transforms "" +
|
|
129
|
+
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
|
|
130
130
|
if (s[0] == ' ') {
|
|
131
131
|
AddMatch(id + n, l + 1, l, matches);
|
|
132
132
|
if (s[1] == 'a') {
|
|
@@ -273,12 +273,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
|
273
273
|
}
|
|
274
274
|
}
|
|
275
275
|
} else {
|
|
276
|
-
/* Set is_all_caps=0 for
|
|
277
|
-
is_all_caps=1 otherwise (
|
|
276
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
|
277
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
|
278
|
+
transform. */
|
|
278
279
|
const BROTLI_BOOL is_all_caps =
|
|
279
|
-
TO_BROTLI_BOOL(w.transform !=
|
|
280
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
|
280
281
|
const uint8_t* s;
|
|
281
|
-
if (!IsMatch(dictionary, w, data, max_length)) {
|
|
282
|
+
if (!IsMatch(dictionary->words, w, data, max_length)) {
|
|
282
283
|
continue;
|
|
283
284
|
}
|
|
284
285
|
/* Transform "" + kUppercase{First,All} + "" */
|
|
@@ -323,27 +324,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
|
323
324
|
/* Transforms with prefixes " " and "." */
|
|
324
325
|
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
|
325
326
|
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
|
326
|
-
size_t offset =
|
|
327
|
+
size_t offset = dictionary->buckets[Hash(&data[1])];
|
|
327
328
|
BROTLI_BOOL end = !offset;
|
|
328
329
|
while (!end) {
|
|
329
|
-
DictWord w =
|
|
330
|
+
DictWord w = dictionary->dict_words[offset++];
|
|
330
331
|
const size_t l = w.len & 0x1F;
|
|
331
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
|
332
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
|
332
333
|
const size_t id = w.idx;
|
|
333
334
|
end = !!(w.len & 0x80);
|
|
334
335
|
w.len = (uint8_t)l;
|
|
335
336
|
if (w.transform == 0) {
|
|
336
337
|
const uint8_t* s;
|
|
337
|
-
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
|
338
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
|
338
339
|
continue;
|
|
339
340
|
}
|
|
340
|
-
/* Transforms " " +
|
|
341
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
|
|
342
|
+
"." + BROTLI_TRANSFORM_IDENTITY + "" */
|
|
341
343
|
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
|
342
344
|
has_found_match = BROTLI_TRUE;
|
|
343
345
|
if (l + 2 >= max_length) {
|
|
344
346
|
continue;
|
|
345
347
|
}
|
|
346
|
-
/* Transforms " " +
|
|
348
|
+
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
|
|
349
|
+
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
|
|
347
350
|
*/
|
|
348
351
|
s = &data[l + 1];
|
|
349
352
|
if (s[0] == ' ') {
|
|
@@ -370,12 +373,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
|
370
373
|
}
|
|
371
374
|
}
|
|
372
375
|
} else if (is_space) {
|
|
373
|
-
/* Set is_all_caps=0 for
|
|
374
|
-
is_all_caps=1 otherwise (
|
|
376
|
+
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
|
377
|
+
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
|
378
|
+
transform. */
|
|
375
379
|
const BROTLI_BOOL is_all_caps =
|
|
376
|
-
TO_BROTLI_BOOL(w.transform !=
|
|
380
|
+
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
|
377
381
|
const uint8_t* s;
|
|
378
|
-
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
|
382
|
+
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
|
379
383
|
continue;
|
|
380
384
|
}
|
|
381
385
|
/* Transforms " " + kUppercase{First,All} + "" */
|
|
@@ -411,22 +415,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
|
411
415
|
}
|
|
412
416
|
}
|
|
413
417
|
if (max_length >= 6) {
|
|
414
|
-
/* Transforms with prefixes "e ", "s ", ", " and "\
|
|
418
|
+
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
|
|
415
419
|
if ((data[1] == ' ' &&
|
|
416
420
|
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
|
417
|
-
(data[0] ==
|
|
418
|
-
size_t offset =
|
|
421
|
+
(data[0] == 0xC2 && data[1] == 0xA0)) {
|
|
422
|
+
size_t offset = dictionary->buckets[Hash(&data[2])];
|
|
419
423
|
BROTLI_BOOL end = !offset;
|
|
420
424
|
while (!end) {
|
|
421
|
-
DictWord w =
|
|
425
|
+
DictWord w = dictionary->dict_words[offset++];
|
|
422
426
|
const size_t l = w.len & 0x1F;
|
|
423
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
|
427
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
|
424
428
|
const size_t id = w.idx;
|
|
425
429
|
end = !!(w.len & 0x80);
|
|
426
430
|
w.len = (uint8_t)l;
|
|
427
431
|
if (w.transform == 0 &&
|
|
428
|
-
IsMatch(dictionary, w, &data[2], max_length - 2)) {
|
|
429
|
-
if (data[0] ==
|
|
432
|
+
IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
|
|
433
|
+
if (data[0] == 0xC2) {
|
|
430
434
|
AddMatch(id + 102 * n, l + 2, l, matches);
|
|
431
435
|
has_found_match = BROTLI_TRUE;
|
|
432
436
|
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
|
@@ -444,17 +448,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
|
|
444
448
|
data[3] == 'e' && data[4] == ' ') ||
|
|
445
449
|
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
|
446
450
|
data[3] == 'm' && data[4] == '/')) {
|
|
447
|
-
size_t offset =
|
|
451
|
+
size_t offset = dictionary->buckets[Hash(&data[5])];
|
|
448
452
|
BROTLI_BOOL end = !offset;
|
|
449
453
|
while (!end) {
|
|
450
|
-
DictWord w =
|
|
454
|
+
DictWord w = dictionary->dict_words[offset++];
|
|
451
455
|
const size_t l = w.len & 0x1F;
|
|
452
|
-
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
|
456
|
+
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
|
453
457
|
const size_t id = w.idx;
|
|
454
458
|
end = !!(w.len & 0x80);
|
|
455
459
|
w.len = (uint8_t)l;
|
|
456
460
|
if (w.transform == 0 &&
|
|
457
|
-
IsMatch(dictionary, w, &data[5], max_length - 5)) {
|
|
461
|
+
IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
|
|
458
462
|
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
|
459
463
|
has_found_match = BROTLI_TRUE;
|
|
460
464
|
if (l + 5 < max_length) {
|