brotli 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +11 -3
  4. data/Gemfile +2 -0
  5. data/ext/brotli/brotli.c +279 -0
  6. data/ext/brotli/brotli.h +2 -0
  7. data/ext/brotli/buffer.c +95 -0
  8. data/ext/brotli/buffer.h +19 -0
  9. data/ext/brotli/extconf.rb +21 -81
  10. data/lib/brotli/version.rb +1 -1
  11. data/vendor/brotli/dec/bit_reader.c +5 -5
  12. data/vendor/brotli/dec/bit_reader.h +15 -15
  13. data/vendor/brotli/dec/context.h +1 -1
  14. data/vendor/brotli/dec/decode.c +433 -348
  15. data/vendor/brotli/dec/decode.h +74 -48
  16. data/vendor/brotli/dec/huffman.c +5 -4
  17. data/vendor/brotli/dec/huffman.h +4 -4
  18. data/vendor/brotli/dec/port.h +2 -95
  19. data/vendor/brotli/dec/prefix.h +5 -3
  20. data/vendor/brotli/dec/state.c +15 -27
  21. data/vendor/brotli/dec/state.h +21 -17
  22. data/vendor/brotli/dec/transform.h +1 -1
  23. data/vendor/brotli/enc/backward_references.c +892 -0
  24. data/vendor/brotli/enc/backward_references.h +85 -102
  25. data/vendor/brotli/enc/backward_references_inc.h +147 -0
  26. data/vendor/brotli/enc/bit_cost.c +35 -0
  27. data/vendor/brotli/enc/bit_cost.h +23 -121
  28. data/vendor/brotli/enc/bit_cost_inc.h +127 -0
  29. data/vendor/brotli/enc/block_encoder_inc.h +33 -0
  30. data/vendor/brotli/enc/block_splitter.c +197 -0
  31. data/vendor/brotli/enc/block_splitter.h +40 -50
  32. data/vendor/brotli/enc/block_splitter_inc.h +432 -0
  33. data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
  34. data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
  35. data/vendor/brotli/enc/cluster.c +56 -0
  36. data/vendor/brotli/enc/cluster.h +23 -305
  37. data/vendor/brotli/enc/cluster_inc.h +315 -0
  38. data/vendor/brotli/enc/command.h +83 -76
  39. data/vendor/brotli/enc/compress_fragment.c +747 -0
  40. data/vendor/brotli/enc/compress_fragment.h +48 -37
  41. data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
  42. data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
  43. data/vendor/brotli/enc/compressor.cc +139 -0
  44. data/vendor/brotli/enc/compressor.h +146 -0
  45. data/vendor/brotli/enc/context.h +102 -96
  46. data/vendor/brotli/enc/dictionary_hash.h +9 -5
  47. data/vendor/brotli/enc/encode.c +1562 -0
  48. data/vendor/brotli/enc/encode.h +211 -199
  49. data/vendor/brotli/enc/encode_parallel.cc +161 -151
  50. data/vendor/brotli/enc/encode_parallel.h +7 -8
  51. data/vendor/brotli/enc/entropy_encode.c +501 -0
  52. data/vendor/brotli/enc/entropy_encode.h +107 -89
  53. data/vendor/brotli/enc/entropy_encode_static.h +29 -62
  54. data/vendor/brotli/enc/fast_log.h +26 -20
  55. data/vendor/brotli/enc/find_match_length.h +23 -20
  56. data/vendor/brotli/enc/hash.h +614 -871
  57. data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
  58. data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
  59. data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
  60. data/vendor/brotli/enc/histogram.c +95 -0
  61. data/vendor/brotli/enc/histogram.h +49 -83
  62. data/vendor/brotli/enc/histogram_inc.h +51 -0
  63. data/vendor/brotli/enc/literal_cost.c +178 -0
  64. data/vendor/brotli/enc/literal_cost.h +16 -10
  65. data/vendor/brotli/enc/memory.c +181 -0
  66. data/vendor/brotli/enc/memory.h +62 -0
  67. data/vendor/brotli/enc/metablock.c +515 -0
  68. data/vendor/brotli/enc/metablock.h +87 -57
  69. data/vendor/brotli/enc/metablock_inc.h +183 -0
  70. data/vendor/brotli/enc/port.h +73 -47
  71. data/vendor/brotli/enc/prefix.h +34 -61
  72. data/vendor/brotli/enc/quality.h +130 -0
  73. data/vendor/brotli/enc/ringbuffer.h +137 -122
  74. data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
  75. data/vendor/brotli/enc/static_dict.h +23 -18
  76. data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
  77. data/vendor/brotli/enc/streams.cc +7 -7
  78. data/vendor/brotli/enc/streams.h +32 -32
  79. data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
  80. data/vendor/brotli/enc/utf8_util.h +16 -9
  81. data/vendor/brotli/enc/write_bits.h +49 -43
  82. metadata +34 -25
  83. data/ext/brotli/brotli.cc +0 -181
  84. data/vendor/brotli/dec/Makefile +0 -12
  85. data/vendor/brotli/dec/dictionary.c +0 -9466
  86. data/vendor/brotli/dec/dictionary.h +0 -38
  87. data/vendor/brotli/dec/types.h +0 -38
  88. data/vendor/brotli/enc/Makefile +0 -14
  89. data/vendor/brotli/enc/backward_references.cc +0 -858
  90. data/vendor/brotli/enc/block_splitter.cc +0 -505
  91. data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
  92. data/vendor/brotli/enc/compress_fragment.cc +0 -701
  93. data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
  94. data/vendor/brotli/enc/dictionary.cc +0 -9466
  95. data/vendor/brotli/enc/dictionary.h +0 -41
  96. data/vendor/brotli/enc/encode.cc +0 -1180
  97. data/vendor/brotli/enc/entropy_encode.cc +0 -480
  98. data/vendor/brotli/enc/histogram.cc +0 -67
  99. data/vendor/brotli/enc/literal_cost.cc +0 -165
  100. data/vendor/brotli/enc/metablock.cc +0 -539
  101. data/vendor/brotli/enc/transform.h +0 -248
  102. data/vendor/brotli/enc/types.h +0 -29
@@ -4,113 +4,96 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- // Function to find backward reference copies.
7
+ /* Function to find backward reference copies. */
8
8
 
9
9
  #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
10
10
  #define BROTLI_ENC_BACKWARD_REFERENCES_H_
11
11
 
12
- #include <vector>
13
-
14
- #include "./hash.h"
12
+ #include "../common/types.h"
15
13
  #include "./command.h"
16
- #include "./types.h"
17
-
18
- namespace brotli {
19
-
20
- // "commands" points to the next output command to write to, "*num_commands" is
21
- // initially the total amount of commands output by previous
22
- // CreateBackwardReferences calls, and must be incremented by the amount written
23
- // by this call.
24
- void CreateBackwardReferences(size_t num_bytes,
25
- size_t position,
26
- bool is_last,
27
- const uint8_t* ringbuffer,
28
- size_t ringbuffer_mask,
29
- const int quality,
30
- const int lgwin,
31
- Hashers* hashers,
32
- int hash_type,
33
- int* dist_cache,
34
- size_t* last_insert_len,
35
- Command* commands,
36
- size_t* num_commands,
37
- size_t* num_literals);
38
-
39
- static const float kInfinity = std::numeric_limits<float>::infinity();
40
-
41
- struct ZopfliNode {
42
- ZopfliNode(void) : length(1),
43
- distance(0),
44
- insert_length(0),
45
- cost(kInfinity) {}
46
-
47
- inline uint32_t copy_length() const {
48
- return length & 0xffffff;
49
- }
50
-
51
- inline uint32_t length_code() const {
52
- const uint32_t modifier = length >> 24;
53
- return copy_length() + 9u - modifier;
54
- }
55
-
56
- inline uint32_t copy_distance() const {
57
- return distance & 0x1ffffff;
58
- }
59
-
60
- inline uint32_t distance_code() const {
61
- const uint32_t short_code = distance >> 25;
62
- return short_code == 0 ? copy_distance() + 15 : short_code - 1;
63
- }
64
-
65
- inline uint32_t command_length() const {
66
- return copy_length() + insert_length;
67
- }
68
-
69
- // best length to get up to this byte (not including this byte itself)
70
- // highest 8 bit is used to reconstruct the length code
14
+ #include "./hash.h"
15
+ #include "./memory.h"
16
+ #include "./port.h"
17
+ #include "./quality.h"
18
+
19
+ #if defined(__cplusplus) || defined(c_plusplus)
20
+ extern "C" {
21
+ #endif
22
+
23
+ /* "commands" points to the next output command to write to, "*num_commands" is
24
+ initially the total amount of commands output by previous
25
+ CreateBackwardReferences calls, and must be incremented by the amount written
26
+ by this call. */
27
+ BROTLI_INTERNAL void BrotliCreateBackwardReferences(
28
+ MemoryManager* m, size_t num_bytes, size_t position, BROTLI_BOOL is_last,
29
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
30
+ const BrotliEncoderParams* params, Hashers* hashers, int* dist_cache,
31
+ size_t* last_insert_len, Command* commands, size_t* num_commands,
32
+ size_t* num_literals);
33
+
34
+ typedef struct ZopfliNode {
35
+ /* best length to get up to this byte (not including this byte itself)
36
+ highest 8 bit is used to reconstruct the length code */
71
37
  uint32_t length;
72
- // distance associated with the length
73
- // highest 7 bit contains distance short code + 1 (or zero if no short code)
38
+ /* distance associated with the length
39
+ highest 7 bit contains distance short code + 1 (or zero if no short code)
40
+ */
74
41
  uint32_t distance;
75
- // number of literal inserts before this copy
42
+ /* number of literal inserts before this copy */
76
43
  uint32_t insert_length;
77
- // smallest cost to get to this byte from the beginning, as found so far
78
- float cost;
79
- };
80
-
81
- // Computes the shortest path of commands from position to at most
82
- // position + num_bytes.
83
- //
84
- // On return, path->size() is the number of commands found and path[i] is the
85
- // length of the ith command (copy length plus insert length).
86
- // Note that the sum of the lengths of all commands can be less than num_bytes.
87
- //
88
- // On return, the nodes[0..num_bytes] array will have the following
89
- // "ZopfliNode array invariant":
90
- // For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
91
- // (1) nodes[i].copy_length() >= 2
92
- // (2) nodes[i].command_length() <= i and
93
- // (3) nodes[i - nodes[i].command_length()].cost < kInfinity
94
- void ZopfliComputeShortestPath(size_t num_bytes,
95
- size_t position,
96
- const uint8_t* ringbuffer,
97
- size_t ringbuffer_mask,
98
- const size_t max_backward_limit,
99
- const int* dist_cache,
100
- Hashers::H10* hasher,
101
- ZopfliNode* nodes,
102
- std::vector<uint32_t>* path);
103
-
104
- void ZopfliCreateCommands(const size_t num_bytes,
105
- const size_t block_start,
106
- const size_t max_backward_limit,
107
- const std::vector<uint32_t>& path,
108
- const ZopfliNode* nodes,
109
- int* dist_cache,
110
- size_t* last_insert_len,
111
- Command* commands,
112
- size_t* num_literals);
113
-
114
- } // namespace brotli
115
-
116
- #endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
44
+
45
+ /* This union holds information used by dynamic-programming. During forward
46
+ pass |cost| it used to store the goal function. When node is processed its
47
+ |cost| is invalidated in favor of |shortcut|. On path backtracing pass
48
+ |next| is assigned the offset to next node on the path. */
49
+ union {
50
+ /* Smallest cost to get to this byte from the beginning, as found so far. */
51
+ float cost;
52
+ /* Offset to the next node on the path. Equals to command_length() of the
53
+ next node on the path. For last node equals to BROTLI_UINT32_MAX */
54
+ uint32_t next;
55
+ /* Node position that provides next distance for distance cache. */
56
+ uint32_t shortcut;
57
+ } u;
58
+ } ZopfliNode;
59
+
60
+ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
61
+
62
+ /* Computes the shortest path of commands from position to at most
63
+ position + num_bytes.
64
+
65
+ On return, path->size() is the number of commands found and path[i] is the
66
+ length of the ith command (copy length plus insert length).
67
+ Note that the sum of the lengths of all commands can be less than num_bytes.
68
+
69
+ On return, the nodes[0..num_bytes] array will have the following
70
+ "ZopfliNode array invariant":
71
+ For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
72
+ (1) nodes[i].copy_length() >= 2
73
+ (2) nodes[i].command_length() <= i and
74
+ (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
75
+ BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
76
+ MemoryManager* m, size_t num_bytes, size_t position,
77
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
78
+ const BrotliEncoderParams* params, const size_t max_backward_limit,
79
+ const int* dist_cache, H10* hasher, ZopfliNode* nodes);
80
+
81
+ BROTLI_INTERNAL void BrotliZopfliCreateCommands(const size_t num_bytes,
82
+ const size_t block_start,
83
+ const size_t max_backward_limit,
84
+ const ZopfliNode* nodes,
85
+ int* dist_cache,
86
+ size_t* last_insert_len,
87
+ Command* commands,
88
+ size_t* num_literals);
89
+
90
+ /* Maximum distance, see section 9.1. of the spec. */
91
+ static BROTLI_INLINE size_t MaxBackwardLimit(int lgwin) {
92
+ return (1u << lgwin) - 16;
93
+ }
94
+
95
+ #if defined(__cplusplus) || defined(c_plusplus)
96
+ } /* extern "C" */
97
+ #endif
98
+
99
+ #endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
@@ -0,0 +1,147 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN */
9
+
10
+ #define Hasher HASHER()
11
+
12
+ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
13
+ MemoryManager* m, size_t num_bytes, size_t position, BROTLI_BOOL is_last,
14
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
15
+ const BrotliEncoderParams* params, Hasher* hasher, int* dist_cache,
16
+ size_t* last_insert_len, Command* commands, size_t* num_commands,
17
+ size_t* num_literals) {
18
+ /* Set maximum distance, see section 9.1. of the spec. */
19
+ const size_t max_backward_limit = MaxBackwardLimit(params->lgwin);
20
+
21
+ const Command* const orig_commands = commands;
22
+ size_t insert_length = *last_insert_len;
23
+ const size_t pos_end = position + num_bytes;
24
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
25
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
26
+
27
+ /* For speed up heuristics for random data. */
28
+ const size_t random_heuristics_window_size =
29
+ LiteralSpreeLengthForSparseSearch(params);
30
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
31
+
32
+ /* Minimum score to accept a backward reference. */
33
+ const score_t kMinScore = BROTLI_SCORE_BASE + 400;
34
+
35
+ FN(Init)(m, hasher, ringbuffer, params, position, num_bytes, is_last);
36
+ if (BROTLI_IS_OOM(m)) return;
37
+ FN(StitchToPreviousBlock)(hasher, num_bytes, position,
38
+ ringbuffer, ringbuffer_mask);
39
+
40
+ while (position + FN(HashTypeLength)() < pos_end) {
41
+ size_t max_length = pos_end - position;
42
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
43
+ HasherSearchResult sr;
44
+ sr.len = 0;
45
+ sr.len_x_code = 0;
46
+ sr.distance = 0;
47
+ sr.score = kMinScore;
48
+ if (FN(FindLongestMatch)(hasher, ringbuffer, ringbuffer_mask, dist_cache,
49
+ position, max_length, max_distance, &sr)) {
50
+ /* Found a match. Let's look for something even better ahead. */
51
+ int delayed_backward_references_in_row = 0;
52
+ --max_length;
53
+ for (;; --max_length) {
54
+ const score_t cost_diff_lazy = 700;
55
+ BROTLI_BOOL is_match_found;
56
+ HasherSearchResult sr2;
57
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
58
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
59
+ sr2.len_x_code = 0;
60
+ sr2.distance = 0;
61
+ sr2.score = kMinScore;
62
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
63
+ is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
64
+ ringbuffer_mask, dist_cache, position + 1, max_length, max_distance,
65
+ &sr2);
66
+ if (is_match_found && sr2.score >= sr.score + cost_diff_lazy) {
67
+ /* Ok, let's just write one byte for now and start a match from the
68
+ next byte. */
69
+ ++position;
70
+ ++insert_length;
71
+ sr = sr2;
72
+ if (++delayed_backward_references_in_row < 4 &&
73
+ position + FN(HashTypeLength)() < pos_end) {
74
+ continue;
75
+ }
76
+ }
77
+ break;
78
+ }
79
+ apply_random_heuristics =
80
+ position + 2 * sr.len + random_heuristics_window_size;
81
+ max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
82
+ {
83
+ /* The first 16 codes are special shortcodes,
84
+ and the minimum offset is 1. */
85
+ size_t distance_code =
86
+ ComputeDistanceCode(sr.distance, max_distance, dist_cache);
87
+ if (sr.distance <= max_distance && distance_code > 0) {
88
+ dist_cache[3] = dist_cache[2];
89
+ dist_cache[2] = dist_cache[1];
90
+ dist_cache[1] = dist_cache[0];
91
+ dist_cache[0] = (int)sr.distance;
92
+ }
93
+ InitCommand(commands++, insert_length, sr.len, sr.len ^ sr.len_x_code,
94
+ distance_code);
95
+ }
96
+ *num_literals += insert_length;
97
+ insert_length = 0;
98
+ /* Put the hash keys into the table, if there are enough bytes left.
99
+ Depending on the hasher implementation, it can push all positions
100
+ in the given range or only a subset of them. */
101
+ FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, position + 2,
102
+ BROTLI_MIN(size_t, position + sr.len, store_end));
103
+ position += sr.len;
104
+ } else {
105
+ ++insert_length;
106
+ ++position;
107
+ /* If we have not seen matches for a long time, we can skip some
108
+ match lookups. Unsuccessful match lookups are very very expensive
109
+ and this kind of a heuristic speeds up compression quite
110
+ a lot. */
111
+ if (position > apply_random_heuristics) {
112
+ /* Going through uncompressible data, jump. */
113
+ if (position >
114
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
115
+ /* It is quite a long time since we saw a copy, so we assume
116
+ that this data is not compressible, and store hashes less
117
+ often. Hashes of non compressible data are less likely to
118
+ turn out to be useful in the future, too, so we store less of
119
+ them to not to flood out the hash table of good compressible
120
+ data. */
121
+ const size_t kMargin =
122
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
123
+ size_t pos_jump =
124
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
125
+ for (; position < pos_jump; position += 4) {
126
+ FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
127
+ insert_length += 4;
128
+ }
129
+ } else {
130
+ const size_t kMargin =
131
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
132
+ size_t pos_jump =
133
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
134
+ for (; position < pos_jump; position += 2) {
135
+ FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
136
+ insert_length += 2;
137
+ }
138
+ }
139
+ }
140
+ }
141
+ }
142
+ insert_length += pos_end - position;
143
+ *last_insert_len = insert_length;
144
+ *num_commands += (size_t)(commands - orig_commands);
145
+ }
146
+
147
+ #undef Hasher
@@ -0,0 +1,35 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions to estimate the bit cost of Huffman trees. */
8
+
9
+ #include "./bit_cost.h"
10
+
11
+ #include "../common/constants.h"
12
+ #include "../common/types.h"
13
+ #include "./fast_log.h"
14
+ #include "./histogram.h"
15
+ #include "./port.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ #define FN(X) X ## Literal
22
+ #include "./bit_cost_inc.h" /* NOLINT(build/include) */
23
+ #undef FN
24
+
25
+ #define FN(X) X ## Command
26
+ #include "./bit_cost_inc.h" /* NOLINT(build/include) */
27
+ #undef FN
28
+
29
+ #define FN(X) X ## Distance
30
+ #include "./bit_cost_inc.h" /* NOLINT(build/include) */
31
+ #undef FN
32
+
33
+ #if defined(__cplusplus) || defined(c_plusplus)
34
+ } /* extern "C" */
35
+ #endif
@@ -4,19 +4,22 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- // Functions to estimate the bit cost of Huffman trees.
7
+ /* Functions to estimate the bit cost of Huffman trees. */
8
8
 
9
9
  #ifndef BROTLI_ENC_BIT_COST_H_
10
10
  #define BROTLI_ENC_BIT_COST_H_
11
11
 
12
- #include "./entropy_encode.h"
12
+ #include "../common/types.h"
13
13
  #include "./fast_log.h"
14
- #include "./types.h"
14
+ #include "./histogram.h"
15
+ #include "./port.h"
15
16
 
16
- namespace brotli {
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
17
20
 
18
- static inline double ShannonEntropy(const uint32_t *population, size_t size,
19
- size_t *total) {
21
+ static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
22
+ size_t size, size_t *total) {
20
23
  size_t sum = 0;
21
24
  double retval = 0;
22
25
  const uint32_t *population_end = population + size;
@@ -27,135 +30,34 @@ static inline double ShannonEntropy(const uint32_t *population, size_t size,
27
30
  while (population < population_end) {
28
31
  p = *population++;
29
32
  sum += p;
30
- retval -= static_cast<double>(p) * FastLog2(p);
33
+ retval -= (double)p * FastLog2(p);
31
34
  odd_number_of_elements_left:
32
35
  p = *population++;
33
36
  sum += p;
34
- retval -= static_cast<double>(p) * FastLog2(p);
37
+ retval -= (double)p * FastLog2(p);
35
38
  }
36
- if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
39
+ if (sum) retval += (double)sum * FastLog2(sum);
37
40
  *total = sum;
38
41
  return retval;
39
42
  }
40
43
 
41
- static inline double BitsEntropy(const uint32_t *population, size_t size) {
44
+ static BROTLI_INLINE double BitsEntropy(
45
+ const uint32_t *population, size_t size) {
42
46
  size_t sum;
43
47
  double retval = ShannonEntropy(population, size, &sum);
44
48
  if (retval < sum) {
45
- // At least one bit per literal is needed.
46
- retval = static_cast<double>(sum);
49
+ /* At least one bit per literal is needed. */
50
+ retval = (double)sum;
47
51
  }
48
52
  return retval;
49
53
  }
50
54
 
51
- template<int kSize>
52
- double PopulationCost(const Histogram<kSize>& histogram) {
53
- static const double kOneSymbolHistogramCost = 12;
54
- static const double kTwoSymbolHistogramCost = 20;
55
- static const double kThreeSymbolHistogramCost = 28;
56
- static const double kFourSymbolHistogramCost = 37;
57
- if (histogram.total_count_ == 0) {
58
- return kOneSymbolHistogramCost;
59
- }
60
- int count = 0;
61
- int s[5];
62
- for (int i = 0; i < kSize; ++i) {
63
- if (histogram.data_[i] > 0) {
64
- s[count] = i;
65
- ++count;
66
- if (count > 4) break;
67
- }
68
- }
69
- if (count == 1) {
70
- return kOneSymbolHistogramCost;
71
- }
72
- if (count == 2) {
73
- return (kTwoSymbolHistogramCost +
74
- static_cast<double>(histogram.total_count_));
75
- }
76
- if (count == 3) {
77
- const uint32_t histo0 = histogram.data_[s[0]];
78
- const uint32_t histo1 = histogram.data_[s[1]];
79
- const uint32_t histo2 = histogram.data_[s[2]];
80
- const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
81
- return (kThreeSymbolHistogramCost +
82
- 2 * (histo0 + histo1 + histo2) - histomax);
83
- }
84
- if (count == 4) {
85
- uint32_t histo[4];
86
- for (int i = 0; i < 4; ++i) {
87
- histo[i] = histogram.data_[s[i]];
88
- }
89
- // Sort
90
- for (int i = 0; i < 4; ++i) {
91
- for (int j = i + 1; j < 4; ++j) {
92
- if (histo[j] > histo[i]) {
93
- std::swap(histo[j], histo[i]);
94
- }
95
- }
96
- }
97
- const uint32_t h23 = histo[2] + histo[3];
98
- const uint32_t histomax = std::max(h23, histo[0]);
99
- return (kFourSymbolHistogramCost +
100
- 3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
101
- }
102
-
103
- // In this loop we compute the entropy of the histogram and simultaneously
104
- // build a simplified histogram of the code length codes where we use the
105
- // zero repeat code 17, but we don't use the non-zero repeat code 16.
106
- double bits = 0;
107
- size_t max_depth = 1;
108
- uint32_t depth_histo[kCodeLengthCodes] = { 0 };
109
- const double log2total = FastLog2(histogram.total_count_);
110
- for (size_t i = 0; i < kSize;) {
111
- if (histogram.data_[i] > 0) {
112
- // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
113
- // = log2(total_count) - log2(count(symbol))
114
- double log2p = log2total - FastLog2(histogram.data_[i]);
115
- // Approximate the bit depth by round(-log2(P(symbol)))
116
- size_t depth = static_cast<size_t>(log2p + 0.5);
117
- bits += histogram.data_[i] * log2p;
118
- if (depth > 15) {
119
- depth = 15;
120
- }
121
- if (depth > max_depth) {
122
- max_depth = depth;
123
- }
124
- ++depth_histo[depth];
125
- ++i;
126
- } else {
127
- // Compute the run length of zeros and add the appropriate number of 0 and
128
- // 17 code length codes to the code length code histogram.
129
- uint32_t reps = 1;
130
- for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
131
- ++reps;
132
- }
133
- i += reps;
134
- if (i == kSize) {
135
- // Don't add any cost for the last zero run, since these are encoded
136
- // only implicitly.
137
- break;
138
- }
139
- if (reps < 3) {
140
- depth_histo[0] += reps;
141
- } else {
142
- reps -= 2;
143
- while (reps > 0) {
144
- ++depth_histo[17];
145
- // Add the 3 extra bits for the 17 code length code.
146
- bits += 3;
147
- reps >>= 3;
148
- }
149
- }
150
- }
151
- }
152
- // Add the estimated encoding cost of the code length code histogram.
153
- bits += static_cast<double>(18 + 2 * max_depth);
154
- // Add the entropy of the code length code histogram.
155
- bits += BitsEntropy(depth_histo, kCodeLengthCodes);
156
- return bits;
157
- }
55
+ BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
56
+ BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
57
+ BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
158
58
 
159
- } // namespace brotli
59
+ #if defined(__cplusplus) || defined(c_plusplus)
60
+ } /* extern "C" */
61
+ #endif
160
62
 
161
- #endif // BROTLI_ENC_BIT_COST_H_
63
+ #endif /* BROTLI_ENC_BIT_COST_H_ */