isomorfeus-ferret 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -4
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +88 -3
@@ -0,0 +1,84 @@
|
|
1
|
+
/* Copyright 2014 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Functions to convert brotli-related data structures into the
|
8
|
+
brotli bit stream. The functions here operate under
|
9
|
+
assumption that there is enough space in the storage, i.e., there are
|
10
|
+
no out-of-range checks anywhere.
|
11
|
+
|
12
|
+
These functions do bit addressing into a byte array. The byte array
|
13
|
+
is called "storage" and the index to the bit is called storage_ix
|
14
|
+
in function arguments. */
|
15
|
+
|
16
|
+
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
17
|
+
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
18
|
+
|
19
|
+
#include "brotli_common_context.h"
|
20
|
+
#include "brotli_common_platform.h"
|
21
|
+
#include "brotli_types.h"
|
22
|
+
#include "brotli_enc_command.h"
|
23
|
+
#include "brotli_enc_entropy_encode.h"
|
24
|
+
#include "brotli_enc_memory.h"
|
25
|
+
#include "brotli_enc_metablock.h"
|
26
|
+
|
27
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
28
|
+
extern "C" {
|
29
|
+
#endif
|
30
|
+
|
31
|
+
/* All Store functions here will use a storage_ix, which is always the bit
|
32
|
+
position for the current storage. */
|
33
|
+
|
34
|
+
BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
|
35
|
+
HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
|
36
|
+
|
37
|
+
BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
|
38
|
+
MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
|
39
|
+
const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
|
40
|
+
uint8_t* storage);
|
41
|
+
|
42
|
+
/* REQUIRES: length > 0 */
|
43
|
+
/* REQUIRES: length <= (1 << 24) */
|
44
|
+
BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
|
45
|
+
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
|
46
|
+
uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
|
47
|
+
const BrotliEncoderParams* params, ContextType literal_context_mode,
|
48
|
+
const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
|
49
|
+
size_t* storage_ix, uint8_t* storage);
|
50
|
+
|
51
|
+
/* Stores the meta-block without doing any block splitting, just collects
|
52
|
+
one histogram per block category and uses that for entropy coding.
|
53
|
+
REQUIRES: length > 0
|
54
|
+
REQUIRES: length <= (1 << 24) */
|
55
|
+
BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
|
56
|
+
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
|
57
|
+
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
|
58
|
+
const Command* commands, size_t n_commands,
|
59
|
+
size_t* storage_ix, uint8_t* storage);
|
60
|
+
|
61
|
+
/* Same as above, but uses static prefix codes for histograms with a only a few
|
62
|
+
symbols, and uses static code length prefix codes for all other histograms.
|
63
|
+
REQUIRES: length > 0
|
64
|
+
REQUIRES: length <= (1 << 24) */
|
65
|
+
BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
|
66
|
+
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
|
67
|
+
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
|
68
|
+
const Command* commands, size_t n_commands,
|
69
|
+
size_t* storage_ix, uint8_t* storage);
|
70
|
+
|
71
|
+
/* This is for storing uncompressed blocks (simple raw storage of
|
72
|
+
bytes-as-bytes).
|
73
|
+
REQUIRES: length > 0
|
74
|
+
REQUIRES: length <= (1 << 24) */
|
75
|
+
BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
|
76
|
+
BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
|
77
|
+
size_t position, size_t mask, size_t len,
|
78
|
+
size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
|
79
|
+
|
80
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
81
|
+
} /* extern "C" */
|
82
|
+
#endif
|
83
|
+
|
84
|
+
#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
|
@@ -0,0 +1,56 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Functions for clustering similar histograms together. */
|
8
|
+
|
9
|
+
#include "brotli_enc_cluster.h"
|
10
|
+
|
11
|
+
#include "brotli_common_platform.h"
|
12
|
+
#include "brotli_types.h"
|
13
|
+
#include "brotli_enc_bit_cost.h" /* BrotliPopulationCost */
|
14
|
+
#include "brotli_enc_fast_log.h"
|
15
|
+
#include "brotli_enc_histogram.h"
|
16
|
+
#include "brotli_enc_memory.h"
|
17
|
+
|
18
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
19
|
+
extern "C" {
|
20
|
+
#endif
|
21
|
+
|
22
|
+
static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
|
23
|
+
const HistogramPair* p1, const HistogramPair* p2) {
|
24
|
+
if (p1->cost_diff != p2->cost_diff) {
|
25
|
+
return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
|
26
|
+
}
|
27
|
+
return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
|
28
|
+
}
|
29
|
+
|
30
|
+
/* Returns entropy reduction of the context map when we combine two clusters. */
|
31
|
+
static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
|
32
|
+
size_t size_c = size_a + size_b;
|
33
|
+
return (double)size_a * FastLog2(size_a) +
|
34
|
+
(double)size_b * FastLog2(size_b) -
|
35
|
+
(double)size_c * FastLog2(size_c);
|
36
|
+
}
|
37
|
+
|
38
|
+
#define CODE(X) X
|
39
|
+
|
40
|
+
#define FN(X) X ## Literal
|
41
|
+
#include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
|
42
|
+
#undef FN
|
43
|
+
|
44
|
+
#define FN(X) X ## Command
|
45
|
+
#include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
|
46
|
+
#undef FN
|
47
|
+
|
48
|
+
#define FN(X) X ## Distance
|
49
|
+
#include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
|
50
|
+
#undef FN
|
51
|
+
|
52
|
+
#undef CODE
|
53
|
+
|
54
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
55
|
+
} /* extern "C" */
|
56
|
+
#endif
|
@@ -0,0 +1,48 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Functions for clustering similar histograms together. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_CLUSTER_H_
|
10
|
+
#define BROTLI_ENC_CLUSTER_H_
|
11
|
+
|
12
|
+
#include "brotli_common_platform.h"
|
13
|
+
#include "brotli_types.h"
|
14
|
+
#include "brotli_enc_histogram.h"
|
15
|
+
#include "brotli_enc_memory.h"
|
16
|
+
|
17
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
typedef struct HistogramPair {
|
22
|
+
uint32_t idx1;
|
23
|
+
uint32_t idx2;
|
24
|
+
double cost_combo;
|
25
|
+
double cost_diff;
|
26
|
+
} HistogramPair;
|
27
|
+
|
28
|
+
#define CODE(X) /* Declaration */;
|
29
|
+
|
30
|
+
#define FN(X) X ## Literal
|
31
|
+
#include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
|
32
|
+
#undef FN
|
33
|
+
|
34
|
+
#define FN(X) X ## Command
|
35
|
+
#include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
|
36
|
+
#undef FN
|
37
|
+
|
38
|
+
#define FN(X) X ## Distance
|
39
|
+
#include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
|
40
|
+
#undef FN
|
41
|
+
|
42
|
+
#undef CODE
|
43
|
+
|
44
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
45
|
+
} /* extern "C" */
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#endif /* BROTLI_ENC_CLUSTER_H_ */
|
@@ -0,0 +1,320 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN, CODE */
|
9
|
+
|
10
|
+
#define HistogramType FN(Histogram)
|
11
|
+
|
12
|
+
/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
|
13
|
+
it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
|
14
|
+
BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
|
15
|
+
const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
|
16
|
+
uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
|
17
|
+
size_t* num_pairs) CODE({
|
18
|
+
BROTLI_BOOL is_good_pair = BROTLI_FALSE;
|
19
|
+
HistogramPair p;
|
20
|
+
p.idx1 = p.idx2 = 0;
|
21
|
+
p.cost_diff = p.cost_combo = 0;
|
22
|
+
if (idx1 == idx2) {
|
23
|
+
return;
|
24
|
+
}
|
25
|
+
if (idx2 < idx1) {
|
26
|
+
uint32_t t = idx2;
|
27
|
+
idx2 = idx1;
|
28
|
+
idx1 = t;
|
29
|
+
}
|
30
|
+
p.idx1 = idx1;
|
31
|
+
p.idx2 = idx2;
|
32
|
+
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
|
33
|
+
p.cost_diff -= out[idx1].bit_cost_;
|
34
|
+
p.cost_diff -= out[idx2].bit_cost_;
|
35
|
+
|
36
|
+
if (out[idx1].total_count_ == 0) {
|
37
|
+
p.cost_combo = out[idx2].bit_cost_;
|
38
|
+
is_good_pair = BROTLI_TRUE;
|
39
|
+
} else if (out[idx2].total_count_ == 0) {
|
40
|
+
p.cost_combo = out[idx1].bit_cost_;
|
41
|
+
is_good_pair = BROTLI_TRUE;
|
42
|
+
} else {
|
43
|
+
double threshold = *num_pairs == 0 ? 1e99 :
|
44
|
+
BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
|
45
|
+
HistogramType combo = out[idx1];
|
46
|
+
double cost_combo;
|
47
|
+
FN(HistogramAddHistogram)(&combo, &out[idx2]);
|
48
|
+
cost_combo = FN(BrotliPopulationCost)(&combo);
|
49
|
+
if (cost_combo < threshold - p.cost_diff) {
|
50
|
+
p.cost_combo = cost_combo;
|
51
|
+
is_good_pair = BROTLI_TRUE;
|
52
|
+
}
|
53
|
+
}
|
54
|
+
if (is_good_pair) {
|
55
|
+
p.cost_diff += p.cost_combo;
|
56
|
+
if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
|
57
|
+
/* Replace the top of the queue if needed. */
|
58
|
+
if (*num_pairs < max_num_pairs) {
|
59
|
+
pairs[*num_pairs] = pairs[0];
|
60
|
+
++(*num_pairs);
|
61
|
+
}
|
62
|
+
pairs[0] = p;
|
63
|
+
} else if (*num_pairs < max_num_pairs) {
|
64
|
+
pairs[*num_pairs] = p;
|
65
|
+
++(*num_pairs);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
})
|
69
|
+
|
70
|
+
BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
|
71
|
+
uint32_t* cluster_size,
|
72
|
+
uint32_t* symbols,
|
73
|
+
uint32_t* clusters,
|
74
|
+
HistogramPair* pairs,
|
75
|
+
size_t num_clusters,
|
76
|
+
size_t symbols_size,
|
77
|
+
size_t max_clusters,
|
78
|
+
size_t max_num_pairs) CODE({
|
79
|
+
double cost_diff_threshold = 0.0;
|
80
|
+
size_t min_cluster_size = 1;
|
81
|
+
size_t num_pairs = 0;
|
82
|
+
|
83
|
+
{
|
84
|
+
/* We maintain a vector of histogram pairs, with the property that the pair
|
85
|
+
with the maximum bit cost reduction is the first. */
|
86
|
+
size_t idx1;
|
87
|
+
for (idx1 = 0; idx1 < num_clusters; ++idx1) {
|
88
|
+
size_t idx2;
|
89
|
+
for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
|
90
|
+
FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
|
91
|
+
clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
|
92
|
+
}
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
while (num_clusters > min_cluster_size) {
|
97
|
+
uint32_t best_idx1;
|
98
|
+
uint32_t best_idx2;
|
99
|
+
size_t i;
|
100
|
+
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
101
|
+
cost_diff_threshold = 1e99;
|
102
|
+
min_cluster_size = max_clusters;
|
103
|
+
continue;
|
104
|
+
}
|
105
|
+
/* Take the best pair from the top of heap. */
|
106
|
+
best_idx1 = pairs[0].idx1;
|
107
|
+
best_idx2 = pairs[0].idx2;
|
108
|
+
FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
|
109
|
+
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
|
110
|
+
cluster_size[best_idx1] += cluster_size[best_idx2];
|
111
|
+
for (i = 0; i < symbols_size; ++i) {
|
112
|
+
if (symbols[i] == best_idx2) {
|
113
|
+
symbols[i] = best_idx1;
|
114
|
+
}
|
115
|
+
}
|
116
|
+
for (i = 0; i < num_clusters; ++i) {
|
117
|
+
if (clusters[i] == best_idx2) {
|
118
|
+
memmove(&clusters[i], &clusters[i + 1],
|
119
|
+
(num_clusters - i - 1) * sizeof(clusters[0]));
|
120
|
+
break;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
--num_clusters;
|
124
|
+
{
|
125
|
+
/* Remove pairs intersecting the just combined best pair. */
|
126
|
+
size_t copy_to_idx = 0;
|
127
|
+
for (i = 0; i < num_pairs; ++i) {
|
128
|
+
HistogramPair* p = &pairs[i];
|
129
|
+
if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
|
130
|
+
p->idx1 == best_idx2 || p->idx2 == best_idx2) {
|
131
|
+
/* Remove invalid pair from the queue. */
|
132
|
+
continue;
|
133
|
+
}
|
134
|
+
if (HistogramPairIsLess(&pairs[0], p)) {
|
135
|
+
/* Replace the top of the queue if needed. */
|
136
|
+
HistogramPair front = pairs[0];
|
137
|
+
pairs[0] = *p;
|
138
|
+
pairs[copy_to_idx] = front;
|
139
|
+
} else {
|
140
|
+
pairs[copy_to_idx] = *p;
|
141
|
+
}
|
142
|
+
++copy_to_idx;
|
143
|
+
}
|
144
|
+
num_pairs = copy_to_idx;
|
145
|
+
}
|
146
|
+
|
147
|
+
/* Push new pairs formed with the combined histogram to the heap. */
|
148
|
+
for (i = 0; i < num_clusters; ++i) {
|
149
|
+
FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
|
150
|
+
max_num_pairs, &pairs[0], &num_pairs);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
return num_clusters;
|
154
|
+
})
|
155
|
+
|
156
|
+
/* What is the bit cost of moving histogram from cur_symbol to candidate. */
|
157
|
+
BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
|
158
|
+
const HistogramType* histogram, const HistogramType* candidate) CODE({
|
159
|
+
if (histogram->total_count_ == 0) {
|
160
|
+
return 0.0;
|
161
|
+
} else {
|
162
|
+
HistogramType tmp = *histogram;
|
163
|
+
FN(HistogramAddHistogram)(&tmp, candidate);
|
164
|
+
return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
|
165
|
+
}
|
166
|
+
})
|
167
|
+
|
168
|
+
/* Find the best 'out' histogram for each of the 'in' histograms.
|
169
|
+
When called, clusters[0..num_clusters) contains the unique values from
|
170
|
+
symbols[0..in_size), but this property is not preserved in this function.
|
171
|
+
Note: we assume that out[]->bit_cost_ is already up-to-date. */
|
172
|
+
BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
|
173
|
+
size_t in_size, const uint32_t* clusters, size_t num_clusters,
|
174
|
+
HistogramType* out, uint32_t* symbols) CODE({
|
175
|
+
size_t i;
|
176
|
+
for (i = 0; i < in_size; ++i) {
|
177
|
+
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
178
|
+
double best_bits =
|
179
|
+
FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
|
180
|
+
size_t j;
|
181
|
+
for (j = 0; j < num_clusters; ++j) {
|
182
|
+
const double cur_bits =
|
183
|
+
FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
|
184
|
+
if (cur_bits < best_bits) {
|
185
|
+
best_bits = cur_bits;
|
186
|
+
best_out = clusters[j];
|
187
|
+
}
|
188
|
+
}
|
189
|
+
symbols[i] = best_out;
|
190
|
+
}
|
191
|
+
|
192
|
+
/* Recompute each out based on raw and symbols. */
|
193
|
+
for (i = 0; i < num_clusters; ++i) {
|
194
|
+
FN(HistogramClear)(&out[clusters[i]]);
|
195
|
+
}
|
196
|
+
for (i = 0; i < in_size; ++i) {
|
197
|
+
FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
|
198
|
+
}
|
199
|
+
})
|
200
|
+
|
201
|
+
/* Reorders elements of the out[0..length) array and changes values in
|
202
|
+
symbols[0..length) array in the following way:
|
203
|
+
* when called, symbols[] contains indexes into out[], and has N unique
|
204
|
+
values (possibly N < length)
|
205
|
+
* on return, symbols'[i] = f(symbols[i]) and
|
206
|
+
out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
|
207
|
+
where f is a bijection between the range of symbols[] and [0..N), and
|
208
|
+
the first occurrences of values in symbols'[i] come in consecutive
|
209
|
+
increasing order.
|
210
|
+
Returns N, the number of unique values in symbols[]. */
|
211
|
+
BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
|
212
|
+
HistogramType* out, uint32_t* symbols, size_t length) CODE({
|
213
|
+
static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
|
214
|
+
uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
|
215
|
+
uint32_t next_index;
|
216
|
+
HistogramType* tmp;
|
217
|
+
size_t i;
|
218
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return 0;
|
219
|
+
for (i = 0; i < length; ++i) {
|
220
|
+
new_index[i] = kInvalidIndex;
|
221
|
+
}
|
222
|
+
next_index = 0;
|
223
|
+
for (i = 0; i < length; ++i) {
|
224
|
+
if (new_index[symbols[i]] == kInvalidIndex) {
|
225
|
+
new_index[symbols[i]] = next_index;
|
226
|
+
++next_index;
|
227
|
+
}
|
228
|
+
}
|
229
|
+
/* TODO: by using idea of "cycle-sort" we can avoid allocation of
|
230
|
+
tmp and reduce the number of copying by the factor of 2. */
|
231
|
+
tmp = BROTLI_ALLOC(m, HistogramType, next_index);
|
232
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return 0;
|
233
|
+
next_index = 0;
|
234
|
+
for (i = 0; i < length; ++i) {
|
235
|
+
if (new_index[symbols[i]] == next_index) {
|
236
|
+
tmp[next_index] = out[symbols[i]];
|
237
|
+
++next_index;
|
238
|
+
}
|
239
|
+
symbols[i] = new_index[symbols[i]];
|
240
|
+
}
|
241
|
+
BROTLI_FREE(m, new_index);
|
242
|
+
for (i = 0; i < next_index; ++i) {
|
243
|
+
out[i] = tmp[i];
|
244
|
+
}
|
245
|
+
BROTLI_FREE(m, tmp);
|
246
|
+
return next_index;
|
247
|
+
})
|
248
|
+
|
249
|
+
BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
|
250
|
+
MemoryManager* m, const HistogramType* in, const size_t in_size,
|
251
|
+
size_t max_histograms, HistogramType* out, size_t* out_size,
|
252
|
+
uint32_t* histogram_symbols) CODE({
|
253
|
+
uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
|
254
|
+
uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
|
255
|
+
size_t num_clusters = 0;
|
256
|
+
const size_t max_input_histograms = 64;
|
257
|
+
size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
|
258
|
+
/* For the first pass of clustering, we allow all pairs. */
|
259
|
+
HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
|
260
|
+
size_t i;
|
261
|
+
|
262
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) ||
|
263
|
+
BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)) {
|
264
|
+
return;
|
265
|
+
}
|
266
|
+
|
267
|
+
for (i = 0; i < in_size; ++i) {
|
268
|
+
cluster_size[i] = 1;
|
269
|
+
}
|
270
|
+
|
271
|
+
for (i = 0; i < in_size; ++i) {
|
272
|
+
out[i] = in[i];
|
273
|
+
out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
|
274
|
+
histogram_symbols[i] = (uint32_t)i;
|
275
|
+
}
|
276
|
+
|
277
|
+
for (i = 0; i < in_size; i += max_input_histograms) {
|
278
|
+
size_t num_to_combine =
|
279
|
+
BROTLI_MIN(size_t, in_size - i, max_input_histograms);
|
280
|
+
size_t num_new_clusters;
|
281
|
+
size_t j;
|
282
|
+
for (j = 0; j < num_to_combine; ++j) {
|
283
|
+
clusters[num_clusters + j] = (uint32_t)(i + j);
|
284
|
+
}
|
285
|
+
num_new_clusters =
|
286
|
+
FN(BrotliHistogramCombine)(out, cluster_size,
|
287
|
+
&histogram_symbols[i],
|
288
|
+
&clusters[num_clusters], pairs,
|
289
|
+
num_to_combine, num_to_combine,
|
290
|
+
max_histograms, pairs_capacity);
|
291
|
+
num_clusters += num_new_clusters;
|
292
|
+
}
|
293
|
+
|
294
|
+
{
|
295
|
+
/* For the second pass, we limit the total number of histogram pairs.
|
296
|
+
After this limit is reached, we only keep searching for the best pair. */
|
297
|
+
size_t max_num_pairs = BROTLI_MIN(size_t,
|
298
|
+
64 * num_clusters, (num_clusters / 2) * num_clusters);
|
299
|
+
BROTLI_ENSURE_CAPACITY(
|
300
|
+
m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
|
301
|
+
if (BROTLI_IS_OOM(m)) return;
|
302
|
+
|
303
|
+
/* Collapse similar histograms. */
|
304
|
+
num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
|
305
|
+
histogram_symbols, clusters,
|
306
|
+
pairs, num_clusters, in_size,
|
307
|
+
max_histograms, max_num_pairs);
|
308
|
+
}
|
309
|
+
BROTLI_FREE(m, pairs);
|
310
|
+
BROTLI_FREE(m, cluster_size);
|
311
|
+
/* Find the optimal map from original histograms to the final ones. */
|
312
|
+
FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
|
313
|
+
out, histogram_symbols);
|
314
|
+
BROTLI_FREE(m, clusters);
|
315
|
+
/* Convert the context map to a canonical form. */
|
316
|
+
*out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
|
317
|
+
if (BROTLI_IS_OOM(m)) return;
|
318
|
+
})
|
319
|
+
|
320
|
+
#undef HistogramType
|
@@ -0,0 +1,28 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include "brotli_enc_command.h"
|
8
|
+
|
9
|
+
#include "brotli_types.h"
|
10
|
+
|
11
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
12
|
+
extern "C" {
|
13
|
+
#endif
|
14
|
+
|
15
|
+
const uint32_t kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES] = {
|
16
|
+
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
|
17
|
+
34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594};
|
18
|
+
const uint32_t kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES] = {
|
19
|
+
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24};
|
20
|
+
const uint32_t kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES] = {
|
21
|
+
2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18,
|
22
|
+
22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118};
|
23
|
+
const uint32_t kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES] = {
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24};
|
25
|
+
|
26
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
27
|
+
} /* extern "C" */
|
28
|
+
#endif
|