isomorfeus-ferret 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -4
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +88 -3
@@ -0,0 +1,33 @@
|
|
1
|
+
/* Copyright 2017 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include "brotli_enc_encoder_dict.h"
|
8
|
+
|
9
|
+
#include "brotli_common_dictionary.h"
|
10
|
+
#include "brotli_common_transform.h"
|
11
|
+
#include "brotli_enc_dictionary_hash.h"
|
12
|
+
#include "brotli_enc_hash.h"
|
13
|
+
|
14
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
15
|
+
extern "C" {
|
16
|
+
#endif
|
17
|
+
|
18
|
+
void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
|
19
|
+
dict->words = BrotliGetDictionary();
|
20
|
+
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
|
21
|
+
|
22
|
+
dict->hash_table_words = kStaticDictionaryHashWords;
|
23
|
+
dict->hash_table_lengths = kStaticDictionaryHashLengths;
|
24
|
+
dict->buckets = kStaticDictionaryBuckets;
|
25
|
+
dict->dict_words = kStaticDictionaryWords;
|
26
|
+
|
27
|
+
dict->cutoffTransformsCount = kCutoffTransformsCount;
|
28
|
+
dict->cutoffTransforms = kCutoffTransforms;
|
29
|
+
}
|
30
|
+
|
31
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
32
|
+
} /* extern "C" */
|
33
|
+
#endif
|
@@ -0,0 +1,43 @@
|
|
1
|
+
/* Copyright 2017 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
#ifndef BROTLI_ENC_ENCODER_DICT_H_
|
8
|
+
#define BROTLI_ENC_ENCODER_DICT_H_
|
9
|
+
|
10
|
+
#include "brotli_common_dictionary.h"
|
11
|
+
#include "brotli_common_platform.h"
|
12
|
+
#include "brotli_types.h"
|
13
|
+
#include "brotli_enc_static_dict_lut.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* Dictionary data (words and transforms) for 1 possible context */
|
20
|
+
typedef struct BrotliEncoderDictionary {
|
21
|
+
const BrotliDictionary* words;
|
22
|
+
uint32_t num_transforms;
|
23
|
+
|
24
|
+
/* cut off for fast encoder */
|
25
|
+
uint32_t cutoffTransformsCount;
|
26
|
+
uint64_t cutoffTransforms;
|
27
|
+
|
28
|
+
/* from dictionary_hash.h, for fast encoder */
|
29
|
+
const uint16_t* hash_table_words;
|
30
|
+
const uint8_t* hash_table_lengths;
|
31
|
+
|
32
|
+
/* from static_dict_lut.h, for slow encoder */
|
33
|
+
const uint16_t* buckets;
|
34
|
+
const DictWord* dict_words;
|
35
|
+
} BrotliEncoderDictionary;
|
36
|
+
|
37
|
+
BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
|
38
|
+
|
39
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
40
|
+
} /* extern "C" */
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#endif /* BROTLI_ENC_ENCODER_DICT_H_ */
|
@@ -0,0 +1,503 @@
|
|
1
|
+
/* Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Entropy encoding (Huffman) utilities. */
|
8
|
+
|
9
|
+
#include "brotli_enc_entropy_encode.h"
|
10
|
+
|
11
|
+
#include <string.h> /* memset */
|
12
|
+
|
13
|
+
#include "brotli_common_constants.h"
|
14
|
+
#include "brotli_common_platform.h"
|
15
|
+
#include "brotli_types.h"
|
16
|
+
|
17
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
const size_t kBrotliShellGaps[] = {132, 57, 23, 10, 4, 1};
|
22
|
+
|
23
|
+
BROTLI_BOOL BrotliSetDepth(
|
24
|
+
int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
|
25
|
+
int stack[16];
|
26
|
+
int level = 0;
|
27
|
+
int p = p0;
|
28
|
+
BROTLI_DCHECK(max_depth <= 15);
|
29
|
+
stack[0] = -1;
|
30
|
+
while (BROTLI_TRUE) {
|
31
|
+
if (pool[p].index_left_ >= 0) {
|
32
|
+
level++;
|
33
|
+
if (level > max_depth) return BROTLI_FALSE;
|
34
|
+
stack[level] = pool[p].index_right_or_value_;
|
35
|
+
p = pool[p].index_left_;
|
36
|
+
continue;
|
37
|
+
} else {
|
38
|
+
depth[pool[p].index_right_or_value_] = (uint8_t)level;
|
39
|
+
}
|
40
|
+
while (level >= 0 && stack[level] == -1) level--;
|
41
|
+
if (level < 0) return BROTLI_TRUE;
|
42
|
+
p = stack[level];
|
43
|
+
stack[level] = -1;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
/* Sort the root nodes, least popular first. */
|
48
|
+
static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
|
49
|
+
const HuffmanTree* v0, const HuffmanTree* v1) {
|
50
|
+
if (v0->total_count_ != v1->total_count_) {
|
51
|
+
return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
|
52
|
+
}
|
53
|
+
return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
|
54
|
+
}
|
55
|
+
|
56
|
+
/* This function will create a Huffman tree.
|
57
|
+
|
58
|
+
The catch here is that the tree cannot be arbitrarily deep.
|
59
|
+
Brotli specifies a maximum depth of 15 bits for "code trees"
|
60
|
+
and 7 bits for "code length code trees."
|
61
|
+
|
62
|
+
count_limit is the value that is to be faked as the minimum value
|
63
|
+
and this minimum value is raised until the tree matches the
|
64
|
+
maximum length requirement.
|
65
|
+
|
66
|
+
This algorithm is not of excellent performance for very long data blocks,
|
67
|
+
especially when population counts are longer than 2**tree_limit, but
|
68
|
+
we are not planning to use this with extremely long blocks.
|
69
|
+
|
70
|
+
See http://en.wikipedia.org/wiki/Huffman_coding */
|
71
|
+
void BrotliCreateHuffmanTree(const uint32_t* data,
|
72
|
+
const size_t length,
|
73
|
+
const int tree_limit,
|
74
|
+
HuffmanTree* tree,
|
75
|
+
uint8_t* depth) {
|
76
|
+
uint32_t count_limit;
|
77
|
+
HuffmanTree sentinel;
|
78
|
+
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
|
79
|
+
/* For block sizes below 64 kB, we never need to do a second iteration
|
80
|
+
of this loop. Probably all of our block sizes will be smaller than
|
81
|
+
that, so this loop is mostly of academic interest. If we actually
|
82
|
+
would need this, we would be better off with the Katajainen algorithm. */
|
83
|
+
for (count_limit = 1; ; count_limit *= 2) {
|
84
|
+
size_t n = 0;
|
85
|
+
size_t i;
|
86
|
+
size_t j;
|
87
|
+
size_t k;
|
88
|
+
for (i = length; i != 0;) {
|
89
|
+
--i;
|
90
|
+
if (data[i]) {
|
91
|
+
const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
|
92
|
+
InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
if (n == 1) {
|
97
|
+
depth[tree[0].index_right_or_value_] = 1; /* Only one element. */
|
98
|
+
break;
|
99
|
+
}
|
100
|
+
|
101
|
+
SortHuffmanTreeItems(tree, n, SortHuffmanTree);
|
102
|
+
|
103
|
+
/* The nodes are:
|
104
|
+
[0, n): the sorted leaf nodes that we start with.
|
105
|
+
[n]: we add a sentinel here.
|
106
|
+
[n + 1, 2n): new parent nodes are added here, starting from
|
107
|
+
(n+1). These are naturally in ascending order.
|
108
|
+
[2n]: we add a sentinel at the end as well.
|
109
|
+
There will be (2n+1) elements at the end. */
|
110
|
+
tree[n] = sentinel;
|
111
|
+
tree[n + 1] = sentinel;
|
112
|
+
|
113
|
+
i = 0; /* Points to the next leaf node. */
|
114
|
+
j = n + 1; /* Points to the next non-leaf node. */
|
115
|
+
for (k = n - 1; k != 0; --k) {
|
116
|
+
size_t left, right;
|
117
|
+
if (tree[i].total_count_ <= tree[j].total_count_) {
|
118
|
+
left = i;
|
119
|
+
++i;
|
120
|
+
} else {
|
121
|
+
left = j;
|
122
|
+
++j;
|
123
|
+
}
|
124
|
+
if (tree[i].total_count_ <= tree[j].total_count_) {
|
125
|
+
right = i;
|
126
|
+
++i;
|
127
|
+
} else {
|
128
|
+
right = j;
|
129
|
+
++j;
|
130
|
+
}
|
131
|
+
|
132
|
+
{
|
133
|
+
/* The sentinel node becomes the parent node. */
|
134
|
+
size_t j_end = 2 * n - k;
|
135
|
+
tree[j_end].total_count_ =
|
136
|
+
tree[left].total_count_ + tree[right].total_count_;
|
137
|
+
tree[j_end].index_left_ = (int16_t)left;
|
138
|
+
tree[j_end].index_right_or_value_ = (int16_t)right;
|
139
|
+
|
140
|
+
/* Add back the last sentinel node. */
|
141
|
+
tree[j_end + 1] = sentinel;
|
142
|
+
}
|
143
|
+
}
|
144
|
+
if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
|
145
|
+
/* We need to pack the Huffman tree in tree_limit bits. If this was not
|
146
|
+
successful, add fake entities to the lowest values and retry. */
|
147
|
+
break;
|
148
|
+
}
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
static void Reverse(uint8_t* v, size_t start, size_t end) {
|
153
|
+
--end;
|
154
|
+
while (start < end) {
|
155
|
+
uint8_t tmp = v[start];
|
156
|
+
v[start] = v[end];
|
157
|
+
v[end] = tmp;
|
158
|
+
++start;
|
159
|
+
--end;
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
static void BrotliWriteHuffmanTreeRepetitions(
|
164
|
+
const uint8_t previous_value,
|
165
|
+
const uint8_t value,
|
166
|
+
size_t repetitions,
|
167
|
+
size_t* tree_size,
|
168
|
+
uint8_t* tree,
|
169
|
+
uint8_t* extra_bits_data) {
|
170
|
+
BROTLI_DCHECK(repetitions > 0);
|
171
|
+
if (previous_value != value) {
|
172
|
+
tree[*tree_size] = value;
|
173
|
+
extra_bits_data[*tree_size] = 0;
|
174
|
+
++(*tree_size);
|
175
|
+
--repetitions;
|
176
|
+
}
|
177
|
+
if (repetitions == 7) {
|
178
|
+
tree[*tree_size] = value;
|
179
|
+
extra_bits_data[*tree_size] = 0;
|
180
|
+
++(*tree_size);
|
181
|
+
--repetitions;
|
182
|
+
}
|
183
|
+
if (repetitions < 3) {
|
184
|
+
size_t i;
|
185
|
+
for (i = 0; i < repetitions; ++i) {
|
186
|
+
tree[*tree_size] = value;
|
187
|
+
extra_bits_data[*tree_size] = 0;
|
188
|
+
++(*tree_size);
|
189
|
+
}
|
190
|
+
} else {
|
191
|
+
size_t start = *tree_size;
|
192
|
+
repetitions -= 3;
|
193
|
+
while (BROTLI_TRUE) {
|
194
|
+
tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
|
195
|
+
extra_bits_data[*tree_size] = repetitions & 0x3;
|
196
|
+
++(*tree_size);
|
197
|
+
repetitions >>= 2;
|
198
|
+
if (repetitions == 0) {
|
199
|
+
break;
|
200
|
+
}
|
201
|
+
--repetitions;
|
202
|
+
}
|
203
|
+
Reverse(tree, start, *tree_size);
|
204
|
+
Reverse(extra_bits_data, start, *tree_size);
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
static void BrotliWriteHuffmanTreeRepetitionsZeros(
|
209
|
+
size_t repetitions,
|
210
|
+
size_t* tree_size,
|
211
|
+
uint8_t* tree,
|
212
|
+
uint8_t* extra_bits_data) {
|
213
|
+
if (repetitions == 11) {
|
214
|
+
tree[*tree_size] = 0;
|
215
|
+
extra_bits_data[*tree_size] = 0;
|
216
|
+
++(*tree_size);
|
217
|
+
--repetitions;
|
218
|
+
}
|
219
|
+
if (repetitions < 3) {
|
220
|
+
size_t i;
|
221
|
+
for (i = 0; i < repetitions; ++i) {
|
222
|
+
tree[*tree_size] = 0;
|
223
|
+
extra_bits_data[*tree_size] = 0;
|
224
|
+
++(*tree_size);
|
225
|
+
}
|
226
|
+
} else {
|
227
|
+
size_t start = *tree_size;
|
228
|
+
repetitions -= 3;
|
229
|
+
while (BROTLI_TRUE) {
|
230
|
+
tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
|
231
|
+
extra_bits_data[*tree_size] = repetitions & 0x7;
|
232
|
+
++(*tree_size);
|
233
|
+
repetitions >>= 3;
|
234
|
+
if (repetitions == 0) {
|
235
|
+
break;
|
236
|
+
}
|
237
|
+
--repetitions;
|
238
|
+
}
|
239
|
+
Reverse(tree, start, *tree_size);
|
240
|
+
Reverse(extra_bits_data, start, *tree_size);
|
241
|
+
}
|
242
|
+
}
|
243
|
+
|
244
|
+
void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
245
|
+
uint8_t* good_for_rle) {
|
246
|
+
size_t nonzero_count = 0;
|
247
|
+
size_t stride;
|
248
|
+
size_t limit;
|
249
|
+
size_t sum;
|
250
|
+
const size_t streak_limit = 1240;
|
251
|
+
/* Let's make the Huffman code more compatible with RLE encoding. */
|
252
|
+
size_t i;
|
253
|
+
for (i = 0; i < length; i++) {
|
254
|
+
if (counts[i]) {
|
255
|
+
++nonzero_count;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
if (nonzero_count < 16) {
|
259
|
+
return;
|
260
|
+
}
|
261
|
+
while (length != 0 && counts[length - 1] == 0) {
|
262
|
+
--length;
|
263
|
+
}
|
264
|
+
if (length == 0) {
|
265
|
+
return; /* All zeros. */
|
266
|
+
}
|
267
|
+
/* Now counts[0..length - 1] does not have trailing zeros. */
|
268
|
+
{
|
269
|
+
size_t nonzeros = 0;
|
270
|
+
uint32_t smallest_nonzero = 1 << 30;
|
271
|
+
for (i = 0; i < length; ++i) {
|
272
|
+
if (counts[i] != 0) {
|
273
|
+
++nonzeros;
|
274
|
+
if (smallest_nonzero > counts[i]) {
|
275
|
+
smallest_nonzero = counts[i];
|
276
|
+
}
|
277
|
+
}
|
278
|
+
}
|
279
|
+
if (nonzeros < 5) {
|
280
|
+
/* Small histogram will model it well. */
|
281
|
+
return;
|
282
|
+
}
|
283
|
+
if (smallest_nonzero < 4) {
|
284
|
+
size_t zeros = length - nonzeros;
|
285
|
+
if (zeros < 6) {
|
286
|
+
for (i = 1; i < length - 1; ++i) {
|
287
|
+
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
|
288
|
+
counts[i] = 1;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
}
|
292
|
+
}
|
293
|
+
if (nonzeros < 28) {
|
294
|
+
return;
|
295
|
+
}
|
296
|
+
}
|
297
|
+
/* 2) Let's mark all population counts that already can be encoded
|
298
|
+
with an RLE code. */
|
299
|
+
memset(good_for_rle, 0, length);
|
300
|
+
{
|
301
|
+
/* Let's not spoil any of the existing good RLE codes.
|
302
|
+
Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
303
|
+
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
|
304
|
+
uint32_t symbol = counts[0];
|
305
|
+
size_t step = 0;
|
306
|
+
for (i = 0; i <= length; ++i) {
|
307
|
+
if (i == length || counts[i] != symbol) {
|
308
|
+
if ((symbol == 0 && step >= 5) ||
|
309
|
+
(symbol != 0 && step >= 7)) {
|
310
|
+
size_t k;
|
311
|
+
for (k = 0; k < step; ++k) {
|
312
|
+
good_for_rle[i - k - 1] = 1;
|
313
|
+
}
|
314
|
+
}
|
315
|
+
step = 1;
|
316
|
+
if (i != length) {
|
317
|
+
symbol = counts[i];
|
318
|
+
}
|
319
|
+
} else {
|
320
|
+
++step;
|
321
|
+
}
|
322
|
+
}
|
323
|
+
}
|
324
|
+
/* 3) Let's replace those population counts that lead to more RLE codes.
|
325
|
+
Math here is in 24.8 fixed point representation. */
|
326
|
+
stride = 0;
|
327
|
+
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
328
|
+
sum = 0;
|
329
|
+
for (i = 0; i <= length; ++i) {
|
330
|
+
if (i == length || good_for_rle[i] ||
|
331
|
+
(i != 0 && good_for_rle[i - 1]) ||
|
332
|
+
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
|
333
|
+
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
334
|
+
size_t k;
|
335
|
+
/* The stride must end, collapse what we have, if we have enough (4). */
|
336
|
+
size_t count = (sum + stride / 2) / stride;
|
337
|
+
if (count == 0) {
|
338
|
+
count = 1;
|
339
|
+
}
|
340
|
+
if (sum == 0) {
|
341
|
+
/* Don't make an all zeros stride to be upgraded to ones. */
|
342
|
+
count = 0;
|
343
|
+
}
|
344
|
+
for (k = 0; k < stride; ++k) {
|
345
|
+
/* We don't want to change value at counts[i],
|
346
|
+
that is already belonging to the next stride. Thus - 1. */
|
347
|
+
counts[i - k - 1] = (uint32_t)count;
|
348
|
+
}
|
349
|
+
}
|
350
|
+
stride = 0;
|
351
|
+
sum = 0;
|
352
|
+
if (i < length - 2) {
|
353
|
+
/* All interesting strides have a count of at least 4, */
|
354
|
+
/* at least when non-zeros. */
|
355
|
+
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
|
356
|
+
} else if (i < length) {
|
357
|
+
limit = 256 * counts[i];
|
358
|
+
} else {
|
359
|
+
limit = 0;
|
360
|
+
}
|
361
|
+
}
|
362
|
+
++stride;
|
363
|
+
if (i != length) {
|
364
|
+
sum += counts[i];
|
365
|
+
if (stride >= 4) {
|
366
|
+
limit = (256 * sum + stride / 2) / stride;
|
367
|
+
}
|
368
|
+
if (stride == 4) {
|
369
|
+
limit += 120;
|
370
|
+
}
|
371
|
+
}
|
372
|
+
}
|
373
|
+
}
|
374
|
+
|
375
|
+
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
376
|
+
BROTLI_BOOL* use_rle_for_non_zero,
|
377
|
+
BROTLI_BOOL* use_rle_for_zero) {
|
378
|
+
size_t total_reps_zero = 0;
|
379
|
+
size_t total_reps_non_zero = 0;
|
380
|
+
size_t count_reps_zero = 1;
|
381
|
+
size_t count_reps_non_zero = 1;
|
382
|
+
size_t i;
|
383
|
+
for (i = 0; i < length;) {
|
384
|
+
const uint8_t value = depth[i];
|
385
|
+
size_t reps = 1;
|
386
|
+
size_t k;
|
387
|
+
for (k = i + 1; k < length && depth[k] == value; ++k) {
|
388
|
+
++reps;
|
389
|
+
}
|
390
|
+
if (reps >= 3 && value == 0) {
|
391
|
+
total_reps_zero += reps;
|
392
|
+
++count_reps_zero;
|
393
|
+
}
|
394
|
+
if (reps >= 4 && value != 0) {
|
395
|
+
total_reps_non_zero += reps;
|
396
|
+
++count_reps_non_zero;
|
397
|
+
}
|
398
|
+
i += reps;
|
399
|
+
}
|
400
|
+
*use_rle_for_non_zero =
|
401
|
+
TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
|
402
|
+
*use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
|
403
|
+
}
|
404
|
+
|
405
|
+
void BrotliWriteHuffmanTree(const uint8_t* depth,
|
406
|
+
size_t length,
|
407
|
+
size_t* tree_size,
|
408
|
+
uint8_t* tree,
|
409
|
+
uint8_t* extra_bits_data) {
|
410
|
+
uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
|
411
|
+
size_t i;
|
412
|
+
BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
|
413
|
+
BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
|
414
|
+
|
415
|
+
/* Throw away trailing zeros. */
|
416
|
+
size_t new_length = length;
|
417
|
+
for (i = 0; i < length; ++i) {
|
418
|
+
if (depth[length - i - 1] == 0) {
|
419
|
+
--new_length;
|
420
|
+
} else {
|
421
|
+
break;
|
422
|
+
}
|
423
|
+
}
|
424
|
+
|
425
|
+
/* First gather statistics on if it is a good idea to do RLE. */
|
426
|
+
if (length > 50) {
|
427
|
+
/* Find RLE coding for longer codes.
|
428
|
+
Shorter codes seem not to benefit from RLE. */
|
429
|
+
DecideOverRleUse(depth, new_length,
|
430
|
+
&use_rle_for_non_zero, &use_rle_for_zero);
|
431
|
+
}
|
432
|
+
|
433
|
+
/* Actual RLE coding. */
|
434
|
+
for (i = 0; i < new_length;) {
|
435
|
+
const uint8_t value = depth[i];
|
436
|
+
size_t reps = 1;
|
437
|
+
if ((value != 0 && use_rle_for_non_zero) ||
|
438
|
+
(value == 0 && use_rle_for_zero)) {
|
439
|
+
size_t k;
|
440
|
+
for (k = i + 1; k < new_length && depth[k] == value; ++k) {
|
441
|
+
++reps;
|
442
|
+
}
|
443
|
+
}
|
444
|
+
if (value == 0) {
|
445
|
+
BrotliWriteHuffmanTreeRepetitionsZeros(
|
446
|
+
reps, tree_size, tree, extra_bits_data);
|
447
|
+
} else {
|
448
|
+
BrotliWriteHuffmanTreeRepetitions(previous_value,
|
449
|
+
value, reps, tree_size,
|
450
|
+
tree, extra_bits_data);
|
451
|
+
previous_value = value;
|
452
|
+
}
|
453
|
+
i += reps;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
|
458
|
+
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
|
459
|
+
0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
|
460
|
+
0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
|
461
|
+
};
|
462
|
+
size_t retval = kLut[bits & 0x0F];
|
463
|
+
size_t i;
|
464
|
+
for (i = 4; i < num_bits; i += 4) {
|
465
|
+
retval <<= 4;
|
466
|
+
bits = (uint16_t)(bits >> 4);
|
467
|
+
retval |= kLut[bits & 0x0F];
|
468
|
+
}
|
469
|
+
retval >>= ((0 - num_bits) & 0x03);
|
470
|
+
return (uint16_t)retval;
|
471
|
+
}
|
472
|
+
|
473
|
+
/* 0..15 are values for bits */
|
474
|
+
#define MAX_HUFFMAN_BITS 16
|
475
|
+
|
476
|
+
void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
|
477
|
+
size_t len,
|
478
|
+
uint16_t* bits) {
|
479
|
+
/* In Brotli, all bit depths are [1..15]
|
480
|
+
0 bit depth means that the symbol does not exist. */
|
481
|
+
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
|
482
|
+
uint16_t next_code[MAX_HUFFMAN_BITS];
|
483
|
+
size_t i;
|
484
|
+
int code = 0;
|
485
|
+
for (i = 0; i < len; ++i) {
|
486
|
+
++bl_count[depth[i]];
|
487
|
+
}
|
488
|
+
bl_count[0] = 0;
|
489
|
+
next_code[0] = 0;
|
490
|
+
for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
|
491
|
+
code = (code + bl_count[i - 1]) << 1;
|
492
|
+
next_code[i] = (uint16_t)code;
|
493
|
+
}
|
494
|
+
for (i = 0; i < len; ++i) {
|
495
|
+
if (depth[i]) {
|
496
|
+
bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
|
497
|
+
}
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
501
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
502
|
+
} /* extern "C" */
|
503
|
+
#endif
|
@@ -0,0 +1,122 @@
|
|
1
|
+
/* Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Entropy encoding (Huffman) utilities. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
10
|
+
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
11
|
+
|
12
|
+
#include "brotli_common_platform.h"
|
13
|
+
#include "brotli_types.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* A node of a Huffman tree. */
|
20
|
+
typedef struct HuffmanTree {
|
21
|
+
uint32_t total_count_;
|
22
|
+
int16_t index_left_;
|
23
|
+
int16_t index_right_or_value_;
|
24
|
+
} HuffmanTree;
|
25
|
+
|
26
|
+
static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
|
27
|
+
int16_t left, int16_t right) {
|
28
|
+
self->total_count_ = count;
|
29
|
+
self->index_left_ = left;
|
30
|
+
self->index_right_or_value_ = right;
|
31
|
+
}
|
32
|
+
|
33
|
+
/* Returns 1 is assignment of depths succeeded, otherwise 0. */
|
34
|
+
BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
|
35
|
+
int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
|
36
|
+
|
37
|
+
/* This function will create a Huffman tree.
|
38
|
+
|
39
|
+
The (data,length) contains the population counts.
|
40
|
+
The tree_limit is the maximum bit depth of the Huffman codes.
|
41
|
+
|
42
|
+
The depth contains the tree, i.e., how many bits are used for
|
43
|
+
the symbol.
|
44
|
+
|
45
|
+
The actual Huffman tree is constructed in the tree[] array, which has to
|
46
|
+
be at least 2 * length + 1 long.
|
47
|
+
|
48
|
+
See http://en.wikipedia.org/wiki/Huffman_coding */
|
49
|
+
BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
|
50
|
+
const size_t length,
|
51
|
+
const int tree_limit,
|
52
|
+
HuffmanTree* tree,
|
53
|
+
uint8_t* depth);
|
54
|
+
|
55
|
+
/* Change the population counts in a way that the consequent
|
56
|
+
Huffman tree compression, especially its RLE-part will be more
|
57
|
+
likely to compress this data more efficiently.
|
58
|
+
|
59
|
+
length contains the size of the histogram.
|
60
|
+
counts contains the population counts.
|
61
|
+
good_for_rle is a buffer of at least length size */
|
62
|
+
BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
|
63
|
+
size_t length, uint32_t* counts, uint8_t* good_for_rle);
|
64
|
+
|
65
|
+
/* Write a Huffman tree from bit depths into the bit-stream representation
|
66
|
+
of a Huffman tree. The generated Huffman tree is to be compressed once
|
67
|
+
more using a Huffman tree */
|
68
|
+
BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
|
69
|
+
size_t num,
|
70
|
+
size_t* tree_size,
|
71
|
+
uint8_t* tree,
|
72
|
+
uint8_t* extra_bits_data);
|
73
|
+
|
74
|
+
/* Get the actual bit values for a tree of bit depths. */
|
75
|
+
BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
|
76
|
+
size_t len,
|
77
|
+
uint16_t* bits);
|
78
|
+
|
79
|
+
BROTLI_INTERNAL extern const size_t kBrotliShellGaps[6];
|
80
|
+
/* Input size optimized Shell sort. */
|
81
|
+
typedef BROTLI_BOOL (*HuffmanTreeComparator)(
|
82
|
+
const HuffmanTree*, const HuffmanTree*);
|
83
|
+
static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
|
84
|
+
const size_t n, HuffmanTreeComparator comparator) {
|
85
|
+
if (n < 13) {
|
86
|
+
/* Insertion sort. */
|
87
|
+
size_t i;
|
88
|
+
for (i = 1; i < n; ++i) {
|
89
|
+
HuffmanTree tmp = items[i];
|
90
|
+
size_t k = i;
|
91
|
+
size_t j = i - 1;
|
92
|
+
while (comparator(&tmp, &items[j])) {
|
93
|
+
items[k] = items[j];
|
94
|
+
k = j;
|
95
|
+
if (!j--) break;
|
96
|
+
}
|
97
|
+
items[k] = tmp;
|
98
|
+
}
|
99
|
+
return;
|
100
|
+
} else {
|
101
|
+
/* Shell sort. */
|
102
|
+
int g = n < 57 ? 2 : 0;
|
103
|
+
for (; g < 6; ++g) {
|
104
|
+
size_t gap = kBrotliShellGaps[g];
|
105
|
+
size_t i;
|
106
|
+
for (i = gap; i < n; ++i) {
|
107
|
+
size_t j = i;
|
108
|
+
HuffmanTree tmp = items[i];
|
109
|
+
for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
|
110
|
+
items[j] = items[j - gap];
|
111
|
+
}
|
112
|
+
items[j] = tmp;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
119
|
+
} /* extern "C" */
|
120
|
+
#endif
|
121
|
+
|
122
|
+
#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
|