isomorfeus-ferret 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -4
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +88 -3
@@ -0,0 +1,95 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Function to find backward reference copies. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
|
10
|
+
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
|
11
|
+
|
12
|
+
#include "brotli_common_constants.h"
|
13
|
+
#include "brotli_common_context.h"
|
14
|
+
#include "brotli_common_dictionary.h"
|
15
|
+
#include "brotli_common_platform.h"
|
16
|
+
#include "brotli_types.h"
|
17
|
+
#include "brotli_enc_command.h"
|
18
|
+
#include "brotli_enc_hash.h"
|
19
|
+
#include "brotli_enc_memory.h"
|
20
|
+
#include "brotli_enc_quality.h"
|
21
|
+
|
22
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
|
27
|
+
size_t num_bytes,
|
28
|
+
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
29
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
30
|
+
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
31
|
+
Command* commands, size_t* num_commands, size_t* num_literals);
|
32
|
+
|
33
|
+
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
|
34
|
+
size_t num_bytes,
|
35
|
+
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
36
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
37
|
+
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
38
|
+
Command* commands, size_t* num_commands, size_t* num_literals);
|
39
|
+
|
40
|
+
typedef struct ZopfliNode {
|
41
|
+
/* Best length to get up to this byte (not including this byte itself)
|
42
|
+
highest 7 bit is used to reconstruct the length code. */
|
43
|
+
uint32_t length;
|
44
|
+
/* Distance associated with the length. */
|
45
|
+
uint32_t distance;
|
46
|
+
/* Number of literal inserts before this copy; highest 5 bits contain
|
47
|
+
distance short code + 1 (or zero if no short code). */
|
48
|
+
uint32_t dcode_insert_length;
|
49
|
+
|
50
|
+
/* This union holds information used by dynamic-programming. During forward
|
51
|
+
pass |cost| it used to store the goal function. When node is processed its
|
52
|
+
|cost| is invalidated in favor of |shortcut|. On path back-tracing pass
|
53
|
+
|next| is assigned the offset to next node on the path. */
|
54
|
+
union {
|
55
|
+
/* Smallest cost to get to this byte from the beginning, as found so far. */
|
56
|
+
float cost;
|
57
|
+
/* Offset to the next node on the path. Equals to command_length() of the
|
58
|
+
next node on the path. For last node equals to BROTLI_UINT32_MAX */
|
59
|
+
uint32_t next;
|
60
|
+
/* Node position that provides next distance for distance cache. */
|
61
|
+
uint32_t shortcut;
|
62
|
+
} u;
|
63
|
+
} ZopfliNode;
|
64
|
+
|
65
|
+
BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
|
66
|
+
|
67
|
+
/* Computes the shortest path of commands from position to at most
|
68
|
+
position + num_bytes.
|
69
|
+
|
70
|
+
On return, path->size() is the number of commands found and path[i] is the
|
71
|
+
length of the i-th command (copy length plus insert length).
|
72
|
+
Note that the sum of the lengths of all commands can be less than num_bytes.
|
73
|
+
|
74
|
+
On return, the nodes[0..num_bytes] array will have the following
|
75
|
+
"ZopfliNode array invariant":
|
76
|
+
For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
77
|
+
(1) nodes[i].copy_length() >= 2
|
78
|
+
(2) nodes[i].command_length() <= i and
|
79
|
+
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
|
80
|
+
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
|
81
|
+
MemoryManager* m, size_t num_bytes,
|
82
|
+
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
83
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
84
|
+
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
|
85
|
+
|
86
|
+
BROTLI_INTERNAL void BrotliZopfliCreateCommands(
|
87
|
+
const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
|
88
|
+
int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
|
89
|
+
Command* commands, size_t* num_literals);
|
90
|
+
|
91
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
92
|
+
} /* extern "C" */
|
93
|
+
#endif
|
94
|
+
|
95
|
+
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
|
@@ -0,0 +1,163 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: EXPORT_FN, FN */
|
9
|
+
|
10
|
+
static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
|
11
|
+
size_t num_bytes, size_t position,
|
12
|
+
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
13
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
14
|
+
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
15
|
+
Command* commands, size_t* num_commands, size_t* num_literals) {
|
16
|
+
HASHER()* privat = &hasher->privat.FN(_);
|
17
|
+
/* Set maximum distance, see section 9.1. of the spec. */
|
18
|
+
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
19
|
+
const size_t position_offset = params->stream_offset;
|
20
|
+
|
21
|
+
const Command* const orig_commands = commands;
|
22
|
+
size_t insert_length = *last_insert_len;
|
23
|
+
const size_t pos_end = position + num_bytes;
|
24
|
+
const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
|
25
|
+
position + num_bytes - FN(StoreLookahead)() + 1 : position;
|
26
|
+
|
27
|
+
/* For speed up heuristics for random data. */
|
28
|
+
const size_t random_heuristics_window_size =
|
29
|
+
LiteralSpreeLengthForSparseSearch(params);
|
30
|
+
size_t apply_random_heuristics = position + random_heuristics_window_size;
|
31
|
+
const size_t gap = 0;
|
32
|
+
|
33
|
+
/* Minimum score to accept a backward reference. */
|
34
|
+
const score_t kMinScore = BROTLI_SCORE_BASE + 100;
|
35
|
+
|
36
|
+
BROTLI_UNUSED(literal_context_lut);
|
37
|
+
|
38
|
+
FN(PrepareDistanceCache)(privat, dist_cache);
|
39
|
+
|
40
|
+
while (position + FN(HashTypeLength)() < pos_end) {
|
41
|
+
size_t max_length = pos_end - position;
|
42
|
+
size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
|
43
|
+
size_t dictionary_start = BROTLI_MIN(size_t,
|
44
|
+
position + position_offset, max_backward_limit);
|
45
|
+
HasherSearchResult sr;
|
46
|
+
sr.len = 0;
|
47
|
+
sr.len_code_delta = 0;
|
48
|
+
sr.distance = 0;
|
49
|
+
sr.score = kMinScore;
|
50
|
+
FN(FindLongestMatch)(privat, ¶ms->dictionary,
|
51
|
+
ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
|
52
|
+
max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
|
53
|
+
if (sr.score > kMinScore) {
|
54
|
+
/* Found a match. Let's look for something even better ahead. */
|
55
|
+
int delayed_backward_references_in_row = 0;
|
56
|
+
--max_length;
|
57
|
+
for (;; --max_length) {
|
58
|
+
const score_t cost_diff_lazy = 175;
|
59
|
+
HasherSearchResult sr2;
|
60
|
+
sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
|
61
|
+
BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
|
62
|
+
sr2.len_code_delta = 0;
|
63
|
+
sr2.distance = 0;
|
64
|
+
sr2.score = kMinScore;
|
65
|
+
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
|
66
|
+
dictionary_start = BROTLI_MIN(size_t,
|
67
|
+
position + 1 + position_offset, max_backward_limit);
|
68
|
+
FN(FindLongestMatch)(privat,
|
69
|
+
¶ms->dictionary,
|
70
|
+
ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
|
71
|
+
max_distance, dictionary_start + gap, params->dist.max_distance,
|
72
|
+
&sr2);
|
73
|
+
if (sr2.score >= sr.score + cost_diff_lazy) {
|
74
|
+
/* Ok, let's just write one byte for now and start a match from the
|
75
|
+
next byte. */
|
76
|
+
++position;
|
77
|
+
++insert_length;
|
78
|
+
sr = sr2;
|
79
|
+
if (++delayed_backward_references_in_row < 4 &&
|
80
|
+
position + FN(HashTypeLength)() < pos_end) {
|
81
|
+
continue;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
break;
|
85
|
+
}
|
86
|
+
apply_random_heuristics =
|
87
|
+
position + 2 * sr.len + random_heuristics_window_size;
|
88
|
+
dictionary_start = BROTLI_MIN(size_t,
|
89
|
+
position + position_offset, max_backward_limit);
|
90
|
+
{
|
91
|
+
/* The first 16 codes are special short-codes,
|
92
|
+
and the minimum offset is 1. */
|
93
|
+
size_t distance_code = ComputeDistanceCode(
|
94
|
+
sr.distance, dictionary_start + gap, dist_cache);
|
95
|
+
if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
|
96
|
+
dist_cache[3] = dist_cache[2];
|
97
|
+
dist_cache[2] = dist_cache[1];
|
98
|
+
dist_cache[1] = dist_cache[0];
|
99
|
+
dist_cache[0] = (int)sr.distance;
|
100
|
+
FN(PrepareDistanceCache)(privat, dist_cache);
|
101
|
+
}
|
102
|
+
InitCommand(commands++, ¶ms->dist, insert_length,
|
103
|
+
sr.len, sr.len_code_delta, distance_code);
|
104
|
+
}
|
105
|
+
*num_literals += insert_length;
|
106
|
+
insert_length = 0;
|
107
|
+
/* Put the hash keys into the table, if there are enough bytes left.
|
108
|
+
Depending on the hasher implementation, it can push all positions
|
109
|
+
in the given range or only a subset of them.
|
110
|
+
Avoid hash poisoning with RLE data. */
|
111
|
+
{
|
112
|
+
size_t range_start = position + 2;
|
113
|
+
size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
|
114
|
+
if (sr.distance < (sr.len >> 2)) {
|
115
|
+
range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
|
116
|
+
range_start, position + sr.len - (sr.distance << 2)));
|
117
|
+
}
|
118
|
+
FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
|
119
|
+
range_end);
|
120
|
+
}
|
121
|
+
position += sr.len;
|
122
|
+
} else {
|
123
|
+
++insert_length;
|
124
|
+
++position;
|
125
|
+
/* If we have not seen matches for a long time, we can skip some
|
126
|
+
match lookups. Unsuccessful match lookups are very very expensive
|
127
|
+
and this kind of a heuristic speeds up compression quite
|
128
|
+
a lot. */
|
129
|
+
if (position > apply_random_heuristics) {
|
130
|
+
/* Going through uncompressible data, jump. */
|
131
|
+
if (position >
|
132
|
+
apply_random_heuristics + 4 * random_heuristics_window_size) {
|
133
|
+
/* It is quite a long time since we saw a copy, so we assume
|
134
|
+
that this data is not compressible, and store hashes less
|
135
|
+
often. Hashes of non compressible data are less likely to
|
136
|
+
turn out to be useful in the future, too, so we store less of
|
137
|
+
them to not to flood out the hash table of good compressible
|
138
|
+
data. */
|
139
|
+
const size_t kMargin =
|
140
|
+
BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
|
141
|
+
size_t pos_jump =
|
142
|
+
BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
|
143
|
+
for (; position < pos_jump; position += 4) {
|
144
|
+
FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
|
145
|
+
insert_length += 4;
|
146
|
+
}
|
147
|
+
} else {
|
148
|
+
const size_t kMargin =
|
149
|
+
BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
|
150
|
+
size_t pos_jump =
|
151
|
+
BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
|
152
|
+
for (; position < pos_jump; position += 2) {
|
153
|
+
FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
|
154
|
+
insert_length += 2;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
}
|
160
|
+
insert_length += pos_end - position;
|
161
|
+
*last_insert_len = insert_length;
|
162
|
+
*num_commands += (size_t)(commands - orig_commands);
|
163
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Functions to estimate the bit cost of Huffman trees. */
|
8
|
+
|
9
|
+
#include "brotli_enc_bit_cost.h"
|
10
|
+
|
11
|
+
#include "brotli_common_constants.h"
|
12
|
+
#include "brotli_common_platform.h"
|
13
|
+
#include "brotli_types.h"
|
14
|
+
#include "brotli_enc_fast_log.h"
|
15
|
+
#include "brotli_enc_histogram.h"
|
16
|
+
|
17
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
#define FN(X) X ## Literal
|
22
|
+
#include "brotli_enc_bit_cost_inc.h" /* NOLINT(build/include) */
|
23
|
+
#undef FN
|
24
|
+
|
25
|
+
#define FN(X) X ## Command
|
26
|
+
#include "brotli_enc_bit_cost_inc.h" /* NOLINT(build/include) */
|
27
|
+
#undef FN
|
28
|
+
|
29
|
+
#define FN(X) X ## Distance
|
30
|
+
#include "brotli_enc_bit_cost_inc.h" /* NOLINT(build/include) */
|
31
|
+
#undef FN
|
32
|
+
|
33
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
34
|
+
} /* extern "C" */
|
35
|
+
#endif
|
@@ -0,0 +1,63 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Functions to estimate the bit cost of Huffman trees. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_BIT_COST_H_
|
10
|
+
#define BROTLI_ENC_BIT_COST_H_
|
11
|
+
|
12
|
+
#include "brotli_common_platform.h"
|
13
|
+
#include "brotli_types.h"
|
14
|
+
#include "brotli_enc_fast_log.h"
|
15
|
+
#include "brotli_enc_histogram.h"
|
16
|
+
|
17
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
static BROTLI_INLINE double ShannonEntropy(
|
22
|
+
const uint32_t* population, size_t size, size_t* total) {
|
23
|
+
size_t sum = 0;
|
24
|
+
double retval = 0;
|
25
|
+
const uint32_t* population_end = population + size;
|
26
|
+
size_t p;
|
27
|
+
if (size & 1) {
|
28
|
+
goto odd_number_of_elements_left;
|
29
|
+
}
|
30
|
+
while (population < population_end) {
|
31
|
+
p = *population++;
|
32
|
+
sum += p;
|
33
|
+
retval -= (double)p * FastLog2(p);
|
34
|
+
odd_number_of_elements_left:
|
35
|
+
p = *population++;
|
36
|
+
sum += p;
|
37
|
+
retval -= (double)p * FastLog2(p);
|
38
|
+
}
|
39
|
+
if (sum) retval += (double)sum * FastLog2(sum);
|
40
|
+
*total = sum;
|
41
|
+
return retval;
|
42
|
+
}
|
43
|
+
|
44
|
+
static BROTLI_INLINE double BitsEntropy(
|
45
|
+
const uint32_t* population, size_t size) {
|
46
|
+
size_t sum;
|
47
|
+
double retval = ShannonEntropy(population, size, &sum);
|
48
|
+
if (retval < sum) {
|
49
|
+
/* At least one bit per literal is needed. */
|
50
|
+
retval = (double)sum;
|
51
|
+
}
|
52
|
+
return retval;
|
53
|
+
}
|
54
|
+
|
55
|
+
BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
|
56
|
+
BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
|
57
|
+
BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
|
58
|
+
|
59
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
60
|
+
} /* extern "C" */
|
61
|
+
#endif
|
62
|
+
|
63
|
+
#endif /* BROTLI_ENC_BIT_COST_H_ */
|
@@ -0,0 +1,127 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN */
|
9
|
+
|
10
|
+
#define HistogramType FN(Histogram)
|
11
|
+
|
12
|
+
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
|
13
|
+
static const double kOneSymbolHistogramCost = 12;
|
14
|
+
static const double kTwoSymbolHistogramCost = 20;
|
15
|
+
static const double kThreeSymbolHistogramCost = 28;
|
16
|
+
static const double kFourSymbolHistogramCost = 37;
|
17
|
+
const size_t data_size = FN(HistogramDataSize)();
|
18
|
+
int count = 0;
|
19
|
+
size_t s[5];
|
20
|
+
double bits = 0.0;
|
21
|
+
size_t i;
|
22
|
+
if (histogram->total_count_ == 0) {
|
23
|
+
return kOneSymbolHistogramCost;
|
24
|
+
}
|
25
|
+
for (i = 0; i < data_size; ++i) {
|
26
|
+
if (histogram->data_[i] > 0) {
|
27
|
+
s[count] = i;
|
28
|
+
++count;
|
29
|
+
if (count > 4) break;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
if (count == 1) {
|
33
|
+
return kOneSymbolHistogramCost;
|
34
|
+
}
|
35
|
+
if (count == 2) {
|
36
|
+
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
|
37
|
+
}
|
38
|
+
if (count == 3) {
|
39
|
+
const uint32_t histo0 = histogram->data_[s[0]];
|
40
|
+
const uint32_t histo1 = histogram->data_[s[1]];
|
41
|
+
const uint32_t histo2 = histogram->data_[s[2]];
|
42
|
+
const uint32_t histomax =
|
43
|
+
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
|
44
|
+
return (kThreeSymbolHistogramCost +
|
45
|
+
2 * (histo0 + histo1 + histo2) - histomax);
|
46
|
+
}
|
47
|
+
if (count == 4) {
|
48
|
+
uint32_t histo[4];
|
49
|
+
uint32_t h23;
|
50
|
+
uint32_t histomax;
|
51
|
+
for (i = 0; i < 4; ++i) {
|
52
|
+
histo[i] = histogram->data_[s[i]];
|
53
|
+
}
|
54
|
+
/* Sort */
|
55
|
+
for (i = 0; i < 4; ++i) {
|
56
|
+
size_t j;
|
57
|
+
for (j = i + 1; j < 4; ++j) {
|
58
|
+
if (histo[j] > histo[i]) {
|
59
|
+
BROTLI_SWAP(uint32_t, histo, j, i);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
h23 = histo[2] + histo[3];
|
64
|
+
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
|
65
|
+
return (kFourSymbolHistogramCost +
|
66
|
+
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
67
|
+
}
|
68
|
+
|
69
|
+
{
|
70
|
+
/* In this loop we compute the entropy of the histogram and simultaneously
|
71
|
+
build a simplified histogram of the code length codes where we use the
|
72
|
+
zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
73
|
+
size_t max_depth = 1;
|
74
|
+
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
|
75
|
+
const double log2total = FastLog2(histogram->total_count_);
|
76
|
+
for (i = 0; i < data_size;) {
|
77
|
+
if (histogram->data_[i] > 0) {
|
78
|
+
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
79
|
+
= log2(total_count) - log2(count(symbol)) */
|
80
|
+
double log2p = log2total - FastLog2(histogram->data_[i]);
|
81
|
+
/* Approximate the bit depth by round(-log2(P(symbol))) */
|
82
|
+
size_t depth = (size_t)(log2p + 0.5);
|
83
|
+
bits += histogram->data_[i] * log2p;
|
84
|
+
if (depth > 15) {
|
85
|
+
depth = 15;
|
86
|
+
}
|
87
|
+
if (depth > max_depth) {
|
88
|
+
max_depth = depth;
|
89
|
+
}
|
90
|
+
++depth_histo[depth];
|
91
|
+
++i;
|
92
|
+
} else {
|
93
|
+
/* Compute the run length of zeros and add the appropriate number of 0
|
94
|
+
and 17 code length codes to the code length code histogram. */
|
95
|
+
uint32_t reps = 1;
|
96
|
+
size_t k;
|
97
|
+
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
|
98
|
+
++reps;
|
99
|
+
}
|
100
|
+
i += reps;
|
101
|
+
if (i == data_size) {
|
102
|
+
/* Don't add any cost for the last zero run, since these are encoded
|
103
|
+
only implicitly. */
|
104
|
+
break;
|
105
|
+
}
|
106
|
+
if (reps < 3) {
|
107
|
+
depth_histo[0] += reps;
|
108
|
+
} else {
|
109
|
+
reps -= 2;
|
110
|
+
while (reps > 0) {
|
111
|
+
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
|
112
|
+
/* Add the 3 extra bits for the 17 code length code. */
|
113
|
+
bits += 3;
|
114
|
+
reps >>= 3;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
/* Add the estimated encoding cost of the code length code histogram. */
|
120
|
+
bits += (double)(18 + 2 * max_depth);
|
121
|
+
/* Add the entropy of the code length code histogram. */
|
122
|
+
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
|
123
|
+
}
|
124
|
+
return bits;
|
125
|
+
}
|
126
|
+
|
127
|
+
#undef HistogramType
|
@@ -0,0 +1,34 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2014 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN */
|
9
|
+
|
10
|
+
#define HistogramType FN(Histogram)
|
11
|
+
|
12
|
+
/* Creates entropy codes for all block types and stores them to the bit
|
13
|
+
stream. */
|
14
|
+
static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
|
15
|
+
const HistogramType* histograms, const size_t histograms_size,
|
16
|
+
const size_t alphabet_size, HuffmanTree* tree,
|
17
|
+
size_t* storage_ix, uint8_t* storage) {
|
18
|
+
const size_t table_size = histograms_size * self->histogram_length_;
|
19
|
+
self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
|
20
|
+
self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
|
21
|
+
if (BROTLI_IS_OOM(m)) return;
|
22
|
+
|
23
|
+
{
|
24
|
+
size_t i;
|
25
|
+
for (i = 0; i < histograms_size; ++i) {
|
26
|
+
size_t ix = i * self->histogram_length_;
|
27
|
+
BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
|
28
|
+
alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
|
29
|
+
storage_ix, storage);
|
30
|
+
}
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
#undef HistogramType
|