isomorfeus-ferret 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -4
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +88 -3
@@ -0,0 +1,105 @@
|
|
1
|
+
/* Copyright 2015 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Algorithms for distributing the literals and commands of a metablock between
|
8
|
+
block types and contexts. */
|
9
|
+
|
10
|
+
#ifndef BROTLI_ENC_METABLOCK_H_
|
11
|
+
#define BROTLI_ENC_METABLOCK_H_
|
12
|
+
|
13
|
+
#include "brotli_common_context.h"
|
14
|
+
#include "brotli_common_platform.h"
|
15
|
+
#include "brotli_types.h"
|
16
|
+
#include "brotli_enc_block_splitter.h"
|
17
|
+
#include "brotli_enc_command.h"
|
18
|
+
#include "brotli_enc_histogram.h"
|
19
|
+
#include "brotli_enc_memory.h"
|
20
|
+
#include "brotli_enc_quality.h"
|
21
|
+
|
22
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
typedef struct MetaBlockSplit {
|
27
|
+
BlockSplit literal_split;
|
28
|
+
BlockSplit command_split;
|
29
|
+
BlockSplit distance_split;
|
30
|
+
uint32_t* literal_context_map;
|
31
|
+
size_t literal_context_map_size;
|
32
|
+
uint32_t* distance_context_map;
|
33
|
+
size_t distance_context_map_size;
|
34
|
+
HistogramLiteral* literal_histograms;
|
35
|
+
size_t literal_histograms_size;
|
36
|
+
HistogramCommand* command_histograms;
|
37
|
+
size_t command_histograms_size;
|
38
|
+
HistogramDistance* distance_histograms;
|
39
|
+
size_t distance_histograms_size;
|
40
|
+
} MetaBlockSplit;
|
41
|
+
|
42
|
+
static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
|
43
|
+
BrotliInitBlockSplit(&mb->literal_split);
|
44
|
+
BrotliInitBlockSplit(&mb->command_split);
|
45
|
+
BrotliInitBlockSplit(&mb->distance_split);
|
46
|
+
mb->literal_context_map = 0;
|
47
|
+
mb->literal_context_map_size = 0;
|
48
|
+
mb->distance_context_map = 0;
|
49
|
+
mb->distance_context_map_size = 0;
|
50
|
+
mb->literal_histograms = 0;
|
51
|
+
mb->literal_histograms_size = 0;
|
52
|
+
mb->command_histograms = 0;
|
53
|
+
mb->command_histograms_size = 0;
|
54
|
+
mb->distance_histograms = 0;
|
55
|
+
mb->distance_histograms_size = 0;
|
56
|
+
}
|
57
|
+
|
58
|
+
static BROTLI_INLINE void DestroyMetaBlockSplit(
|
59
|
+
MemoryManager* m, MetaBlockSplit* mb) {
|
60
|
+
BrotliDestroyBlockSplit(m, &mb->literal_split);
|
61
|
+
BrotliDestroyBlockSplit(m, &mb->command_split);
|
62
|
+
BrotliDestroyBlockSplit(m, &mb->distance_split);
|
63
|
+
BROTLI_FREE(m, mb->literal_context_map);
|
64
|
+
BROTLI_FREE(m, mb->distance_context_map);
|
65
|
+
BROTLI_FREE(m, mb->literal_histograms);
|
66
|
+
BROTLI_FREE(m, mb->command_histograms);
|
67
|
+
BROTLI_FREE(m, mb->distance_histograms);
|
68
|
+
}
|
69
|
+
|
70
|
+
/* Uses the slow shortest-path block splitter and does context clustering.
|
71
|
+
The distance parameters are dynamically selected based on the commands
|
72
|
+
which get recomputed under the new distance parameters. The new distance
|
73
|
+
parameters are stored into *params. */
|
74
|
+
BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
75
|
+
const uint8_t* ringbuffer,
|
76
|
+
const size_t pos,
|
77
|
+
const size_t mask,
|
78
|
+
BrotliEncoderParams* params,
|
79
|
+
uint8_t prev_byte,
|
80
|
+
uint8_t prev_byte2,
|
81
|
+
Command* cmds,
|
82
|
+
size_t num_commands,
|
83
|
+
ContextType literal_context_mode,
|
84
|
+
MetaBlockSplit* mb);
|
85
|
+
|
86
|
+
/* Uses a fast greedy block splitter that tries to merge current block with the
|
87
|
+
last or the second last block and uses a static context clustering which
|
88
|
+
is the same for all block types. */
|
89
|
+
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
|
90
|
+
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
91
|
+
uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
|
92
|
+
size_t num_contexts, const uint32_t* static_context_map,
|
93
|
+
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
|
94
|
+
|
95
|
+
BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
|
96
|
+
MetaBlockSplit* mb);
|
97
|
+
|
98
|
+
BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
|
99
|
+
uint32_t npostfix, uint32_t ndirect);
|
100
|
+
|
101
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
102
|
+
} /* extern "C" */
|
103
|
+
#endif
|
104
|
+
|
105
|
+
#endif /* BROTLI_ENC_METABLOCK_H_ */
|
@@ -0,0 +1,183 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2015 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN */
|
9
|
+
|
10
|
+
#define HistogramType FN(Histogram)
|
11
|
+
|
12
|
+
/* Greedy block splitter for one block category (literal, command or distance).
|
13
|
+
*/
|
14
|
+
typedef struct FN(BlockSplitter) {
|
15
|
+
/* Alphabet size of particular block category. */
|
16
|
+
size_t alphabet_size_;
|
17
|
+
/* We collect at least this many symbols for each block. */
|
18
|
+
size_t min_block_size_;
|
19
|
+
/* We merge histograms A and B if
|
20
|
+
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
21
|
+
where A is the current histogram and B is the histogram of the last or the
|
22
|
+
second last block type. */
|
23
|
+
double split_threshold_;
|
24
|
+
|
25
|
+
size_t num_blocks_;
|
26
|
+
BlockSplit* split_; /* not owned */
|
27
|
+
HistogramType* histograms_; /* not owned */
|
28
|
+
size_t* histograms_size_; /* not owned */
|
29
|
+
|
30
|
+
/* The number of symbols that we want to collect before deciding on whether
|
31
|
+
or not to merge the block with a previous one or emit a new block. */
|
32
|
+
size_t target_block_size_;
|
33
|
+
/* The number of symbols in the current histogram. */
|
34
|
+
size_t block_size_;
|
35
|
+
/* Offset of the current histogram. */
|
36
|
+
size_t curr_histogram_ix_;
|
37
|
+
/* Offset of the histograms of the previous two block types. */
|
38
|
+
size_t last_histogram_ix_[2];
|
39
|
+
/* Entropy of the previous two block types. */
|
40
|
+
double last_entropy_[2];
|
41
|
+
/* The number of times we merged the current block with the last one. */
|
42
|
+
size_t merge_last_count_;
|
43
|
+
} FN(BlockSplitter);
|
44
|
+
|
45
|
+
static void FN(InitBlockSplitter)(
|
46
|
+
MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
|
47
|
+
size_t min_block_size, double split_threshold, size_t num_symbols,
|
48
|
+
BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
|
49
|
+
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
50
|
+
/* We have to allocate one more histogram than the maximum number of block
|
51
|
+
types for the current histogram when the meta-block is too big. */
|
52
|
+
size_t max_num_types =
|
53
|
+
BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
|
54
|
+
self->alphabet_size_ = alphabet_size;
|
55
|
+
self->min_block_size_ = min_block_size;
|
56
|
+
self->split_threshold_ = split_threshold;
|
57
|
+
self->num_blocks_ = 0;
|
58
|
+
self->split_ = split;
|
59
|
+
self->histograms_size_ = histograms_size;
|
60
|
+
self->target_block_size_ = min_block_size;
|
61
|
+
self->block_size_ = 0;
|
62
|
+
self->curr_histogram_ix_ = 0;
|
63
|
+
self->merge_last_count_ = 0;
|
64
|
+
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
65
|
+
split->types, split->types_alloc_size, max_num_blocks);
|
66
|
+
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
67
|
+
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
68
|
+
if (BROTLI_IS_OOM(m)) return;
|
69
|
+
self->split_->num_blocks = max_num_blocks;
|
70
|
+
BROTLI_DCHECK(*histograms == 0);
|
71
|
+
*histograms_size = max_num_types;
|
72
|
+
*histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
|
73
|
+
self->histograms_ = *histograms;
|
74
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
|
75
|
+
/* Clear only current histogram. */
|
76
|
+
FN(HistogramClear)(&self->histograms_[0]);
|
77
|
+
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
|
78
|
+
}
|
79
|
+
|
80
|
+
/* Does either of three things:
|
81
|
+
(1) emits the current block with a new block type;
|
82
|
+
(2) emits the current block with the type of the second last block;
|
83
|
+
(3) merges the current block with the last block. */
|
84
|
+
static void FN(BlockSplitterFinishBlock)(
|
85
|
+
FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
|
86
|
+
BlockSplit* split = self->split_;
|
87
|
+
double* last_entropy = self->last_entropy_;
|
88
|
+
HistogramType* histograms = self->histograms_;
|
89
|
+
self->block_size_ =
|
90
|
+
BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
|
91
|
+
if (self->num_blocks_ == 0) {
|
92
|
+
/* Create first block. */
|
93
|
+
split->lengths[0] = (uint32_t)self->block_size_;
|
94
|
+
split->types[0] = 0;
|
95
|
+
last_entropy[0] =
|
96
|
+
BitsEntropy(histograms[0].data_, self->alphabet_size_);
|
97
|
+
last_entropy[1] = last_entropy[0];
|
98
|
+
++self->num_blocks_;
|
99
|
+
++split->num_types;
|
100
|
+
++self->curr_histogram_ix_;
|
101
|
+
if (self->curr_histogram_ix_ < *self->histograms_size_)
|
102
|
+
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
103
|
+
self->block_size_ = 0;
|
104
|
+
} else if (self->block_size_ > 0) {
|
105
|
+
double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
|
106
|
+
self->alphabet_size_);
|
107
|
+
HistogramType combined_histo[2];
|
108
|
+
double combined_entropy[2];
|
109
|
+
double diff[2];
|
110
|
+
size_t j;
|
111
|
+
for (j = 0; j < 2; ++j) {
|
112
|
+
size_t last_histogram_ix = self->last_histogram_ix_[j];
|
113
|
+
combined_histo[j] = histograms[self->curr_histogram_ix_];
|
114
|
+
FN(HistogramAddHistogram)(&combined_histo[j],
|
115
|
+
&histograms[last_histogram_ix]);
|
116
|
+
combined_entropy[j] = BitsEntropy(
|
117
|
+
&combined_histo[j].data_[0], self->alphabet_size_);
|
118
|
+
diff[j] = combined_entropy[j] - entropy - last_entropy[j];
|
119
|
+
}
|
120
|
+
|
121
|
+
if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
|
122
|
+
diff[0] > self->split_threshold_ &&
|
123
|
+
diff[1] > self->split_threshold_) {
|
124
|
+
/* Create new block. */
|
125
|
+
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
126
|
+
split->types[self->num_blocks_] = (uint8_t)split->num_types;
|
127
|
+
self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
|
128
|
+
self->last_histogram_ix_[0] = (uint8_t)split->num_types;
|
129
|
+
last_entropy[1] = last_entropy[0];
|
130
|
+
last_entropy[0] = entropy;
|
131
|
+
++self->num_blocks_;
|
132
|
+
++split->num_types;
|
133
|
+
++self->curr_histogram_ix_;
|
134
|
+
if (self->curr_histogram_ix_ < *self->histograms_size_)
|
135
|
+
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
136
|
+
self->block_size_ = 0;
|
137
|
+
self->merge_last_count_ = 0;
|
138
|
+
self->target_block_size_ = self->min_block_size_;
|
139
|
+
} else if (diff[1] < diff[0] - 20.0) {
|
140
|
+
/* Combine this block with second last block. */
|
141
|
+
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
142
|
+
split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
|
143
|
+
BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
|
144
|
+
histograms[self->last_histogram_ix_[0]] = combined_histo[1];
|
145
|
+
last_entropy[1] = last_entropy[0];
|
146
|
+
last_entropy[0] = combined_entropy[1];
|
147
|
+
++self->num_blocks_;
|
148
|
+
self->block_size_ = 0;
|
149
|
+
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
150
|
+
self->merge_last_count_ = 0;
|
151
|
+
self->target_block_size_ = self->min_block_size_;
|
152
|
+
} else {
|
153
|
+
/* Combine this block with last block. */
|
154
|
+
split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
|
155
|
+
histograms[self->last_histogram_ix_[0]] = combined_histo[0];
|
156
|
+
last_entropy[0] = combined_entropy[0];
|
157
|
+
if (split->num_types == 1) {
|
158
|
+
last_entropy[1] = last_entropy[0];
|
159
|
+
}
|
160
|
+
self->block_size_ = 0;
|
161
|
+
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
162
|
+
if (++self->merge_last_count_ > 1) {
|
163
|
+
self->target_block_size_ += self->min_block_size_;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
|
+
if (is_final) {
|
168
|
+
*self->histograms_size_ = split->num_types;
|
169
|
+
split->num_blocks = self->num_blocks_;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
/* Adds the next symbol to the current histogram. When the current histogram
|
174
|
+
reaches the target size, decides on merging the block. */
|
175
|
+
static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
|
176
|
+
FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
|
177
|
+
++self->block_size_;
|
178
|
+
if (self->block_size_ == self->target_block_size_) {
|
179
|
+
FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
#undef HistogramType
|
@@ -0,0 +1,46 @@
|
|
1
|
+
/* Copyright 2017 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Parameters for the Brotli encoder with chosen quality levels. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_PARAMS_H_
|
10
|
+
#define BROTLI_ENC_PARAMS_H_
|
11
|
+
|
12
|
+
#include "brotli_encode.h"
|
13
|
+
#include "brotli_enc_encoder_dict.h"
|
14
|
+
|
15
|
+
typedef struct BrotliHasherParams {
|
16
|
+
int type;
|
17
|
+
int bucket_bits;
|
18
|
+
int block_bits;
|
19
|
+
int hash_len;
|
20
|
+
int num_last_distances_to_check;
|
21
|
+
} BrotliHasherParams;
|
22
|
+
|
23
|
+
typedef struct BrotliDistanceParams {
|
24
|
+
uint32_t distance_postfix_bits;
|
25
|
+
uint32_t num_direct_distance_codes;
|
26
|
+
uint32_t alphabet_size_max;
|
27
|
+
uint32_t alphabet_size_limit;
|
28
|
+
size_t max_distance;
|
29
|
+
} BrotliDistanceParams;
|
30
|
+
|
31
|
+
/* Encoding parameters */
|
32
|
+
typedef struct BrotliEncoderParams {
|
33
|
+
BrotliEncoderMode mode;
|
34
|
+
int quality;
|
35
|
+
int lgwin;
|
36
|
+
int lgblock;
|
37
|
+
size_t stream_offset;
|
38
|
+
size_t size_hint;
|
39
|
+
BROTLI_BOOL disable_literal_context_modeling;
|
40
|
+
BROTLI_BOOL large_window;
|
41
|
+
BrotliHasherParams hasher;
|
42
|
+
BrotliDistanceParams dist;
|
43
|
+
BrotliEncoderDictionary dictionary;
|
44
|
+
} BrotliEncoderParams;
|
45
|
+
|
46
|
+
#endif /* BROTLI_ENC_PARAMS_H_ */
|
@@ -0,0 +1,53 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Functions for encoding of integers into prefix codes the amount of extra
|
8
|
+
bits, and the actual values of the extra bits. */
|
9
|
+
|
10
|
+
#ifndef BROTLI_ENC_PREFIX_H_
|
11
|
+
#define BROTLI_ENC_PREFIX_H_
|
12
|
+
|
13
|
+
#include "brotli_common_constants.h"
|
14
|
+
#include "brotli_common_platform.h"
|
15
|
+
#include "brotli_types.h"
|
16
|
+
#include "brotli_enc_fast_log.h"
|
17
|
+
|
18
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
19
|
+
extern "C" {
|
20
|
+
#endif
|
21
|
+
|
22
|
+
/* Here distance_code is an intermediate code, i.e. one of the special codes or
|
23
|
+
the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
|
24
|
+
static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
|
25
|
+
size_t num_direct_codes,
|
26
|
+
size_t postfix_bits,
|
27
|
+
uint16_t* code,
|
28
|
+
uint32_t* extra_bits) {
|
29
|
+
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
|
30
|
+
*code = (uint16_t)distance_code;
|
31
|
+
*extra_bits = 0;
|
32
|
+
return;
|
33
|
+
} else {
|
34
|
+
size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
|
35
|
+
(distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
|
36
|
+
size_t bucket = Log2FloorNonZero(dist) - 1;
|
37
|
+
size_t postfix_mask = (1u << postfix_bits) - 1;
|
38
|
+
size_t postfix = dist & postfix_mask;
|
39
|
+
size_t prefix = (dist >> bucket) & 1;
|
40
|
+
size_t offset = (2 + prefix) << bucket;
|
41
|
+
size_t nbits = bucket - postfix_bits;
|
42
|
+
*code = (uint16_t)((nbits << 10) |
|
43
|
+
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
|
44
|
+
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
45
|
+
*extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
50
|
+
} /* extern "C" */
|
51
|
+
#endif
|
52
|
+
|
53
|
+
#endif /* BROTLI_ENC_PREFIX_H_ */
|
@@ -0,0 +1,165 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Constants and formulas that affect speed-ratio trade-offs and thus define
|
8
|
+
quality levels. */
|
9
|
+
|
10
|
+
#ifndef BROTLI_ENC_QUALITY_H_
|
11
|
+
#define BROTLI_ENC_QUALITY_H_
|
12
|
+
|
13
|
+
#include "brotli_common_platform.h"
|
14
|
+
#include "brotli_encode.h"
|
15
|
+
#include "brotli_enc_params.h"
|
16
|
+
|
17
|
+
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
|
18
|
+
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
|
19
|
+
#define ZOPFLIFICATION_QUALITY 10
|
20
|
+
#define HQ_ZOPFLIFICATION_QUALITY 11
|
21
|
+
|
22
|
+
#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
|
23
|
+
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
|
24
|
+
#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
|
25
|
+
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
|
26
|
+
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
|
27
|
+
#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
|
28
|
+
#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
|
29
|
+
#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
|
30
|
+
|
31
|
+
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
|
32
|
+
so we buffer at most this much literals and commands. */
|
33
|
+
#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
|
34
|
+
|
35
|
+
/* Returns hash-table size for quality levels 0 and 1. */
|
36
|
+
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
|
37
|
+
return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
|
38
|
+
}
|
39
|
+
|
40
|
+
/* The maximum length for which the zopflification uses distinct distances. */
|
41
|
+
#define MAX_ZOPFLI_LEN_QUALITY_10 150
|
42
|
+
#define MAX_ZOPFLI_LEN_QUALITY_11 325
|
43
|
+
|
44
|
+
/* Do not thoroughly search when a long copy is found. */
|
45
|
+
#define BROTLI_LONG_COPY_QUICK_STEP 16384
|
46
|
+
|
47
|
+
static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
|
48
|
+
return params->quality <= 10 ?
|
49
|
+
MAX_ZOPFLI_LEN_QUALITY_10 :
|
50
|
+
MAX_ZOPFLI_LEN_QUALITY_11;
|
51
|
+
}
|
52
|
+
|
53
|
+
/* Number of best candidates to evaluate to expand Zopfli chain. */
|
54
|
+
static BROTLI_INLINE size_t MaxZopfliCandidates(
|
55
|
+
const BrotliEncoderParams* params) {
|
56
|
+
return params->quality <= 10 ? 1 : 5;
|
57
|
+
}
|
58
|
+
|
59
|
+
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
|
60
|
+
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
|
61
|
+
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
|
62
|
+
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
|
63
|
+
params->large_window = BROTLI_FALSE;
|
64
|
+
}
|
65
|
+
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
|
66
|
+
params->lgwin = BROTLI_MIN_WINDOW_BITS;
|
67
|
+
} else {
|
68
|
+
int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
|
69
|
+
BROTLI_MAX_WINDOW_BITS;
|
70
|
+
if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
/* Returns optimized lg_block value. */
|
75
|
+
static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
|
76
|
+
int lgblock = params->lgblock;
|
77
|
+
if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
|
78
|
+
params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
|
79
|
+
lgblock = params->lgwin;
|
80
|
+
} else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
|
81
|
+
lgblock = 14;
|
82
|
+
} else if (lgblock == 0) {
|
83
|
+
lgblock = 16;
|
84
|
+
if (params->quality >= 9 && params->lgwin > lgblock) {
|
85
|
+
lgblock = BROTLI_MIN(int, 18, params->lgwin);
|
86
|
+
}
|
87
|
+
} else {
|
88
|
+
lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
|
89
|
+
BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
|
90
|
+
}
|
91
|
+
return lgblock;
|
92
|
+
}
|
93
|
+
|
94
|
+
/* Returns log2 of the size of main ring buffer area.
|
95
|
+
Allocate at least lgwin + 1 bits for the ring buffer so that the newly
|
96
|
+
added block fits there completely and we still get lgwin bits and at least
|
97
|
+
read_block_size_bits + 1 bits because the copy tail length needs to be
|
98
|
+
smaller than ring-buffer size. */
|
99
|
+
static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
|
100
|
+
return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
|
101
|
+
}
|
102
|
+
|
103
|
+
static BROTLI_INLINE size_t MaxMetablockSize(
|
104
|
+
const BrotliEncoderParams* params) {
|
105
|
+
int bits =
|
106
|
+
BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
|
107
|
+
return (size_t)1 << bits;
|
108
|
+
}
|
109
|
+
|
110
|
+
/* When searching for backward references and have not seen matches for a long
|
111
|
+
time, we can skip some match lookups. Unsuccessful match lookups are very
|
112
|
+
expensive and this kind of a heuristic speeds up compression quite a lot.
|
113
|
+
At first 8 byte strides are taken and every second byte is put to hasher.
|
114
|
+
After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
|
115
|
+
Applied only to qualities 2 to 9. */
|
116
|
+
static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
|
117
|
+
const BrotliEncoderParams* params) {
|
118
|
+
return params->quality < 9 ? 64 : 512;
|
119
|
+
}
|
120
|
+
|
121
|
+
static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
|
122
|
+
BrotliHasherParams* hparams) {
|
123
|
+
if (params->quality > 9) {
|
124
|
+
hparams->type = 10;
|
125
|
+
} else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
|
126
|
+
hparams->type = 54;
|
127
|
+
} else if (params->quality < 5) {
|
128
|
+
hparams->type = params->quality;
|
129
|
+
} else if (params->lgwin <= 16) {
|
130
|
+
hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
|
131
|
+
} else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
|
132
|
+
hparams->type = 6;
|
133
|
+
hparams->block_bits = params->quality - 1;
|
134
|
+
hparams->bucket_bits = 15;
|
135
|
+
hparams->hash_len = 5;
|
136
|
+
hparams->num_last_distances_to_check =
|
137
|
+
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
138
|
+
} else {
|
139
|
+
hparams->type = 5;
|
140
|
+
hparams->block_bits = params->quality - 1;
|
141
|
+
hparams->bucket_bits = params->quality < 7 ? 14 : 15;
|
142
|
+
hparams->num_last_distances_to_check =
|
143
|
+
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
144
|
+
}
|
145
|
+
|
146
|
+
if (params->lgwin > 24) {
|
147
|
+
/* Different hashers for large window brotli: not for qualities <= 2,
|
148
|
+
these are too fast for large window. Not for qualities >= 10: their
|
149
|
+
hasher already works well with large window. So the changes are:
|
150
|
+
H3 --> H35: for quality 3.
|
151
|
+
H54 --> H55: for quality 4 with size hint > 1MB
|
152
|
+
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
|
153
|
+
if (hparams->type == 3) {
|
154
|
+
hparams->type = 35;
|
155
|
+
}
|
156
|
+
if (hparams->type == 54) {
|
157
|
+
hparams->type = 55;
|
158
|
+
}
|
159
|
+
if (hparams->type == 6) {
|
160
|
+
hparams->type = 65;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
#endif /* BROTLI_ENC_QUALITY_H_ */
|