isomorfeus-ferret 0.12.4 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +612 -612
- data/README.md +77 -48
- data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
- data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
- data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
- data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
- data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
- data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
- data/ext/isomorfeus_ferret_ext/test.c +7 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +125 -5
- data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,843 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Function to find backward reference copies. */
|
8
|
+
|
9
|
+
#include "brotli_enc_backward_references_hq.h"
|
10
|
+
|
11
|
+
#include <string.h> /* memcpy, memset */
|
12
|
+
|
13
|
+
#include "brotli_common_constants.h"
|
14
|
+
#include "brotli_common_context.h"
|
15
|
+
#include "brotli_common_platform.h"
|
16
|
+
#include "brotli_types.h"
|
17
|
+
#include "brotli_enc_command.h"
|
18
|
+
#include "brotli_enc_fast_log.h"
|
19
|
+
#include "brotli_enc_find_match_length.h"
|
20
|
+
#include "brotli_enc_literal_cost.h"
|
21
|
+
#include "brotli_enc_memory.h"
|
22
|
+
#include "brotli_enc_params.h"
|
23
|
+
#include "brotli_enc_prefix.h"
|
24
|
+
#include "brotli_enc_quality.h"
|
25
|
+
|
26
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
/* BrotliCalculateDistanceCodeLimit(BROTLI_MAX_ALLOWED_DISTANCE, 3, 120). */
|
31
|
+
#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
|
32
|
+
|
33
|
+
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
|
34
|
+
|
35
|
+
static const uint32_t kDistanceCacheIndex[] = {
|
36
|
+
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
37
|
+
};
|
38
|
+
static const int kDistanceCacheOffset[] = {
|
39
|
+
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
40
|
+
};
|
41
|
+
|
42
|
+
void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
|
43
|
+
ZopfliNode stub;
|
44
|
+
size_t i;
|
45
|
+
stub.length = 1;
|
46
|
+
stub.distance = 0;
|
47
|
+
stub.dcode_insert_length = 0;
|
48
|
+
stub.u.cost = kInfinity;
|
49
|
+
for (i = 0; i < length; ++i) array[i] = stub;
|
50
|
+
}
|
51
|
+
|
52
|
+
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
|
53
|
+
return self->length & 0x1FFFFFF;
|
54
|
+
}
|
55
|
+
|
56
|
+
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
|
57
|
+
const uint32_t modifier = self->length >> 25;
|
58
|
+
return ZopfliNodeCopyLength(self) + 9u - modifier;
|
59
|
+
}
|
60
|
+
|
61
|
+
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
|
62
|
+
return self->distance;
|
63
|
+
}
|
64
|
+
|
65
|
+
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
|
66
|
+
const uint32_t short_code = self->dcode_insert_length >> 27;
|
67
|
+
return short_code == 0 ?
|
68
|
+
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
|
69
|
+
short_code - 1;
|
70
|
+
}
|
71
|
+
|
72
|
+
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
|
73
|
+
return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
|
74
|
+
}
|
75
|
+
|
76
|
+
/* Histogram based cost model for zopflification. */
|
77
|
+
typedef struct ZopfliCostModel {
|
78
|
+
/* The insert and copy length symbols. */
|
79
|
+
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
|
80
|
+
float* cost_dist_;
|
81
|
+
uint32_t distance_histogram_size;
|
82
|
+
/* Cumulative costs of literals per position in the stream. */
|
83
|
+
float* literal_costs_;
|
84
|
+
float min_cost_cmd_;
|
85
|
+
size_t num_bytes_;
|
86
|
+
} ZopfliCostModel;
|
87
|
+
|
88
|
+
static void InitZopfliCostModel(
|
89
|
+
MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
|
90
|
+
size_t num_bytes) {
|
91
|
+
self->num_bytes_ = num_bytes;
|
92
|
+
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
|
93
|
+
self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size_limit);
|
94
|
+
self->distance_histogram_size = dist->alphabet_size_limit;
|
95
|
+
if (BROTLI_IS_OOM(m)) return;
|
96
|
+
}
|
97
|
+
|
98
|
+
static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
|
99
|
+
BROTLI_FREE(m, self->literal_costs_);
|
100
|
+
BROTLI_FREE(m, self->cost_dist_);
|
101
|
+
}
|
102
|
+
|
103
|
+
static void SetCost(const uint32_t* histogram, size_t histogram_size,
|
104
|
+
BROTLI_BOOL literal_histogram, float* cost) {
|
105
|
+
size_t sum = 0;
|
106
|
+
size_t missing_symbol_sum;
|
107
|
+
float log2sum;
|
108
|
+
float missing_symbol_cost;
|
109
|
+
size_t i;
|
110
|
+
for (i = 0; i < histogram_size; i++) {
|
111
|
+
sum += histogram[i];
|
112
|
+
}
|
113
|
+
log2sum = (float)FastLog2(sum);
|
114
|
+
missing_symbol_sum = sum;
|
115
|
+
if (!literal_histogram) {
|
116
|
+
for (i = 0; i < histogram_size; i++) {
|
117
|
+
if (histogram[i] == 0) missing_symbol_sum++;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
|
121
|
+
for (i = 0; i < histogram_size; i++) {
|
122
|
+
if (histogram[i] == 0) {
|
123
|
+
cost[i] = missing_symbol_cost;
|
124
|
+
continue;
|
125
|
+
}
|
126
|
+
|
127
|
+
/* Shannon bits for this symbol. */
|
128
|
+
cost[i] = log2sum - (float)FastLog2(histogram[i]);
|
129
|
+
|
130
|
+
/* Cannot be coded with less than 1 bit */
|
131
|
+
if (cost[i] < 1) cost[i] = 1;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
|
136
|
+
size_t position,
|
137
|
+
const uint8_t* ringbuffer,
|
138
|
+
size_t ringbuffer_mask,
|
139
|
+
const Command* commands,
|
140
|
+
size_t num_commands,
|
141
|
+
size_t last_insert_len) {
|
142
|
+
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
|
143
|
+
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
|
144
|
+
uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
|
145
|
+
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
|
146
|
+
size_t pos = position - last_insert_len;
|
147
|
+
float min_cost_cmd = kInfinity;
|
148
|
+
size_t i;
|
149
|
+
float* cost_cmd = self->cost_cmd_;
|
150
|
+
|
151
|
+
memset(histogram_literal, 0, sizeof(histogram_literal));
|
152
|
+
memset(histogram_cmd, 0, sizeof(histogram_cmd));
|
153
|
+
memset(histogram_dist, 0, sizeof(histogram_dist));
|
154
|
+
|
155
|
+
for (i = 0; i < num_commands; i++) {
|
156
|
+
size_t inslength = commands[i].insert_len_;
|
157
|
+
size_t copylength = CommandCopyLen(&commands[i]);
|
158
|
+
size_t distcode = commands[i].dist_prefix_ & 0x3FF;
|
159
|
+
size_t cmdcode = commands[i].cmd_prefix_;
|
160
|
+
size_t j;
|
161
|
+
|
162
|
+
histogram_cmd[cmdcode]++;
|
163
|
+
if (cmdcode >= 128) histogram_dist[distcode]++;
|
164
|
+
|
165
|
+
for (j = 0; j < inslength; j++) {
|
166
|
+
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
|
167
|
+
}
|
168
|
+
|
169
|
+
pos += inslength + copylength;
|
170
|
+
}
|
171
|
+
|
172
|
+
SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
|
173
|
+
cost_literal);
|
174
|
+
SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
|
175
|
+
cost_cmd);
|
176
|
+
SetCost(histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
|
177
|
+
self->cost_dist_);
|
178
|
+
|
179
|
+
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
|
180
|
+
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
|
181
|
+
}
|
182
|
+
self->min_cost_cmd_ = min_cost_cmd;
|
183
|
+
|
184
|
+
{
|
185
|
+
float* literal_costs = self->literal_costs_;
|
186
|
+
float literal_carry = 0.0;
|
187
|
+
size_t num_bytes = self->num_bytes_;
|
188
|
+
literal_costs[0] = 0.0;
|
189
|
+
for (i = 0; i < num_bytes; ++i) {
|
190
|
+
literal_carry +=
|
191
|
+
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
|
192
|
+
literal_costs[i + 1] = literal_costs[i] + literal_carry;
|
193
|
+
literal_carry -= literal_costs[i + 1] - literal_costs[i];
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
|
199
|
+
size_t position,
|
200
|
+
const uint8_t* ringbuffer,
|
201
|
+
size_t ringbuffer_mask) {
|
202
|
+
float* literal_costs = self->literal_costs_;
|
203
|
+
float literal_carry = 0.0;
|
204
|
+
float* cost_dist = self->cost_dist_;
|
205
|
+
float* cost_cmd = self->cost_cmd_;
|
206
|
+
size_t num_bytes = self->num_bytes_;
|
207
|
+
size_t i;
|
208
|
+
BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
|
209
|
+
ringbuffer, &literal_costs[1]);
|
210
|
+
literal_costs[0] = 0.0;
|
211
|
+
for (i = 0; i < num_bytes; ++i) {
|
212
|
+
literal_carry += literal_costs[i + 1];
|
213
|
+
literal_costs[i + 1] = literal_costs[i] + literal_carry;
|
214
|
+
literal_carry -= literal_costs[i + 1] - literal_costs[i];
|
215
|
+
}
|
216
|
+
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
|
217
|
+
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
|
218
|
+
}
|
219
|
+
for (i = 0; i < self->distance_histogram_size; ++i) {
|
220
|
+
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
|
221
|
+
}
|
222
|
+
self->min_cost_cmd_ = (float)FastLog2(11);
|
223
|
+
}
|
224
|
+
|
225
|
+
static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
|
226
|
+
const ZopfliCostModel* self, uint16_t cmdcode) {
|
227
|
+
return self->cost_cmd_[cmdcode];
|
228
|
+
}
|
229
|
+
|
230
|
+
static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
|
231
|
+
const ZopfliCostModel* self, size_t distcode) {
|
232
|
+
return self->cost_dist_[distcode];
|
233
|
+
}
|
234
|
+
|
235
|
+
static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
|
236
|
+
const ZopfliCostModel* self, size_t from, size_t to) {
|
237
|
+
return self->literal_costs_[to] - self->literal_costs_[from];
|
238
|
+
}
|
239
|
+
|
240
|
+
static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
|
241
|
+
const ZopfliCostModel* self) {
|
242
|
+
return self->min_cost_cmd_;
|
243
|
+
}
|
244
|
+
|
245
|
+
/* REQUIRES: len >= 2, start_pos <= pos */
|
246
|
+
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
|
247
|
+
/* Maintains the "ZopfliNode array invariant". */
|
248
|
+
static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
|
249
|
+
size_t start_pos, size_t len, size_t len_code, size_t dist,
|
250
|
+
size_t short_code, float cost) {
|
251
|
+
ZopfliNode* next = &nodes[pos + len];
|
252
|
+
next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
|
253
|
+
next->distance = (uint32_t)dist;
|
254
|
+
next->dcode_insert_length = (uint32_t)(
|
255
|
+
(short_code << 27) | (pos - start_pos));
|
256
|
+
next->u.cost = cost;
|
257
|
+
}
|
258
|
+
|
259
|
+
typedef struct PosData {
|
260
|
+
size_t pos;
|
261
|
+
int distance_cache[4];
|
262
|
+
float costdiff;
|
263
|
+
float cost;
|
264
|
+
} PosData;
|
265
|
+
|
266
|
+
/* Maintains the smallest 8 cost difference together with their positions */
|
267
|
+
typedef struct StartPosQueue {
|
268
|
+
PosData q_[8];
|
269
|
+
size_t idx_;
|
270
|
+
} StartPosQueue;
|
271
|
+
|
272
|
+
static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
|
273
|
+
self->idx_ = 0;
|
274
|
+
}
|
275
|
+
|
276
|
+
static size_t StartPosQueueSize(const StartPosQueue* self) {
|
277
|
+
return BROTLI_MIN(size_t, self->idx_, 8);
|
278
|
+
}
|
279
|
+
|
280
|
+
static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
|
281
|
+
size_t offset = ~(self->idx_++) & 7;
|
282
|
+
size_t len = StartPosQueueSize(self);
|
283
|
+
size_t i;
|
284
|
+
PosData* q = self->q_;
|
285
|
+
q[offset] = *posdata;
|
286
|
+
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
287
|
+
adjacent element comparisons / swaps are required. */
|
288
|
+
for (i = 1; i < len; ++i) {
|
289
|
+
if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
|
290
|
+
BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
|
291
|
+
}
|
292
|
+
++offset;
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
|
297
|
+
return &self->q_[(k - self->idx_) & 7];
|
298
|
+
}
|
299
|
+
|
300
|
+
/* Returns the minimum possible copy length that can improve the cost of any */
|
301
|
+
/* future position. */
|
302
|
+
static size_t ComputeMinimumCopyLength(const float start_cost,
|
303
|
+
const ZopfliNode* nodes,
|
304
|
+
const size_t num_bytes,
|
305
|
+
const size_t pos) {
|
306
|
+
/* Compute the minimum possible cost of reaching any future position. */
|
307
|
+
float min_cost = start_cost;
|
308
|
+
size_t len = 2;
|
309
|
+
size_t next_len_bucket = 4;
|
310
|
+
size_t next_len_offset = 10;
|
311
|
+
while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
|
312
|
+
/* We already reached (pos + len) with no more cost than the minimum
|
313
|
+
possible cost of reaching anything from this pos, so there is no point in
|
314
|
+
looking for lengths <= len. */
|
315
|
+
++len;
|
316
|
+
if (len == next_len_offset) {
|
317
|
+
/* We reached the next copy length code bucket, so we add one more
|
318
|
+
extra bit to the minimum cost. */
|
319
|
+
min_cost += 1.0f;
|
320
|
+
next_len_offset += next_len_bucket;
|
321
|
+
next_len_bucket *= 2;
|
322
|
+
}
|
323
|
+
}
|
324
|
+
return len;
|
325
|
+
}
|
326
|
+
|
327
|
+
/* REQUIRES: nodes[pos].cost < kInfinity
|
328
|
+
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
329
|
+
static uint32_t ComputeDistanceShortcut(const size_t block_start,
|
330
|
+
const size_t pos,
|
331
|
+
const size_t max_backward_limit,
|
332
|
+
const size_t gap,
|
333
|
+
const ZopfliNode* nodes) {
|
334
|
+
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
|
335
|
+
const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
|
336
|
+
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
|
337
|
+
/* Since |block_start + pos| is the end position of the command, the copy part
|
338
|
+
starts from |block_start + pos - clen|. Distances that are greater than
|
339
|
+
this or greater than |max_backward_limit| + |gap| are static dictionary
|
340
|
+
references, and do not update the last distances.
|
341
|
+
Also distance code 0 (last distance) does not update the last distances. */
|
342
|
+
if (pos == 0) {
|
343
|
+
return 0;
|
344
|
+
} else if (dist + clen <= block_start + pos + gap &&
|
345
|
+
dist <= max_backward_limit + gap &&
|
346
|
+
ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
|
347
|
+
return (uint32_t)pos;
|
348
|
+
} else {
|
349
|
+
return nodes[pos - clen - ilen].u.shortcut;
|
350
|
+
}
|
351
|
+
}
|
352
|
+
|
353
|
+
/* Fills in dist_cache[0..3] with the last four distances (as defined by
|
354
|
+
Section 4. of the Spec) that would be used at (block_start + pos) if we
|
355
|
+
used the shortest path of commands from block_start, computed from
|
356
|
+
nodes[0..pos]. The last four distances at block_start are in
|
357
|
+
starting_dist_cache[0..3].
|
358
|
+
REQUIRES: nodes[pos].cost < kInfinity
|
359
|
+
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
360
|
+
static void ComputeDistanceCache(const size_t pos,
|
361
|
+
const int* starting_dist_cache,
|
362
|
+
const ZopfliNode* nodes,
|
363
|
+
int* dist_cache) {
|
364
|
+
int idx = 0;
|
365
|
+
size_t p = nodes[pos].u.shortcut;
|
366
|
+
while (idx < 4 && p > 0) {
|
367
|
+
const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
|
368
|
+
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
|
369
|
+
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
|
370
|
+
dist_cache[idx++] = (int)dist;
|
371
|
+
/* Because of prerequisite, p >= clen + ilen >= 2. */
|
372
|
+
p = nodes[p - clen - ilen].u.shortcut;
|
373
|
+
}
|
374
|
+
for (; idx < 4; ++idx) {
|
375
|
+
dist_cache[idx] = *starting_dist_cache++;
|
376
|
+
}
|
377
|
+
}
|
378
|
+
|
379
|
+
/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
|
380
|
+
is eligible. */
|
381
|
+
static void EvaluateNode(
|
382
|
+
const size_t block_start, const size_t pos, const size_t max_backward_limit,
|
383
|
+
const size_t gap, const int* starting_dist_cache,
|
384
|
+
const ZopfliCostModel* model, StartPosQueue* queue, ZopfliNode* nodes) {
|
385
|
+
/* Save cost, because ComputeDistanceCache invalidates it. */
|
386
|
+
float node_cost = nodes[pos].u.cost;
|
387
|
+
nodes[pos].u.shortcut = ComputeDistanceShortcut(
|
388
|
+
block_start, pos, max_backward_limit, gap, nodes);
|
389
|
+
if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
|
390
|
+
PosData posdata;
|
391
|
+
posdata.pos = pos;
|
392
|
+
posdata.cost = node_cost;
|
393
|
+
posdata.costdiff = node_cost -
|
394
|
+
ZopfliCostModelGetLiteralCosts(model, 0, pos);
|
395
|
+
ComputeDistanceCache(
|
396
|
+
pos, starting_dist_cache, nodes, posdata.distance_cache);
|
397
|
+
StartPosQueuePush(queue, &posdata);
|
398
|
+
}
|
399
|
+
}
|
400
|
+
|
401
|
+
/* Returns longest copy length. */
|
402
|
+
static size_t UpdateNodes(
|
403
|
+
const size_t num_bytes, const size_t block_start, const size_t pos,
|
404
|
+
const uint8_t* ringbuffer, const size_t ringbuffer_mask,
|
405
|
+
const BrotliEncoderParams* params, const size_t max_backward_limit,
|
406
|
+
const int* starting_dist_cache, const size_t num_matches,
|
407
|
+
const BackwardMatch* matches, const ZopfliCostModel* model,
|
408
|
+
StartPosQueue* queue, ZopfliNode* nodes) {
|
409
|
+
const size_t stream_offset = params->stream_offset;
|
410
|
+
const size_t cur_ix = block_start + pos;
|
411
|
+
const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
|
412
|
+
const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
|
413
|
+
const size_t dictionary_start = BROTLI_MIN(size_t,
|
414
|
+
cur_ix + stream_offset, max_backward_limit);
|
415
|
+
const size_t max_len = num_bytes - pos;
|
416
|
+
const size_t max_zopfli_len = MaxZopfliLen(params);
|
417
|
+
const size_t max_iters = MaxZopfliCandidates(params);
|
418
|
+
size_t min_len;
|
419
|
+
size_t result = 0;
|
420
|
+
size_t k;
|
421
|
+
size_t gap = 0;
|
422
|
+
|
423
|
+
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
|
424
|
+
starting_dist_cache, model, queue, nodes);
|
425
|
+
|
426
|
+
{
|
427
|
+
const PosData* posdata = StartPosQueueAt(queue, 0);
|
428
|
+
float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
|
429
|
+
ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
|
430
|
+
min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
|
431
|
+
}
|
432
|
+
|
433
|
+
/* Go over the command starting positions in order of increasing cost
|
434
|
+
difference. */
|
435
|
+
for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
|
436
|
+
const PosData* posdata = StartPosQueueAt(queue, k);
|
437
|
+
const size_t start = posdata->pos;
|
438
|
+
const uint16_t inscode = GetInsertLengthCode(pos - start);
|
439
|
+
const float start_costdiff = posdata->costdiff;
|
440
|
+
const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
|
441
|
+
ZopfliCostModelGetLiteralCosts(model, 0, pos);
|
442
|
+
|
443
|
+
/* Look for last distance matches using the distance cache from this
|
444
|
+
starting position. */
|
445
|
+
size_t best_len = min_len - 1;
|
446
|
+
size_t j = 0;
|
447
|
+
for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
|
448
|
+
const size_t idx = kDistanceCacheIndex[j];
|
449
|
+
const size_t backward =
|
450
|
+
(size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
|
451
|
+
size_t prev_ix = cur_ix - backward;
|
452
|
+
size_t len = 0;
|
453
|
+
uint8_t continuation = ringbuffer[cur_ix_masked + best_len];
|
454
|
+
if (cur_ix_masked + best_len > ringbuffer_mask) {
|
455
|
+
break;
|
456
|
+
}
|
457
|
+
if (BROTLI_PREDICT_FALSE(backward > dictionary_start + gap)) {
|
458
|
+
/* Word dictionary -> ignore. */
|
459
|
+
continue;
|
460
|
+
}
|
461
|
+
if (backward <= max_distance) {
|
462
|
+
/* Regular backward reference. */
|
463
|
+
if (prev_ix >= cur_ix) {
|
464
|
+
continue;
|
465
|
+
}
|
466
|
+
|
467
|
+
prev_ix &= ringbuffer_mask;
|
468
|
+
if (prev_ix + best_len > ringbuffer_mask ||
|
469
|
+
continuation != ringbuffer[prev_ix + best_len]) {
|
470
|
+
continue;
|
471
|
+
}
|
472
|
+
len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
|
473
|
+
&ringbuffer[cur_ix_masked],
|
474
|
+
max_len);
|
475
|
+
} else {
|
476
|
+
/* "Gray" area. It is addressable by decoder, but this encoder
|
477
|
+
instance does not have that data -> should not touch it. */
|
478
|
+
continue;
|
479
|
+
}
|
480
|
+
{
|
481
|
+
const float dist_cost = base_cost +
|
482
|
+
ZopfliCostModelGetDistanceCost(model, j);
|
483
|
+
size_t l;
|
484
|
+
for (l = best_len + 1; l <= len; ++l) {
|
485
|
+
const uint16_t copycode = GetCopyLengthCode(l);
|
486
|
+
const uint16_t cmdcode =
|
487
|
+
CombineLengthCodes(inscode, copycode, j == 0);
|
488
|
+
const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
|
489
|
+
(float)GetCopyExtra(copycode) +
|
490
|
+
ZopfliCostModelGetCommandCost(model, cmdcode);
|
491
|
+
if (cost < nodes[pos + l].u.cost) {
|
492
|
+
UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
|
493
|
+
result = BROTLI_MAX(size_t, result, l);
|
494
|
+
}
|
495
|
+
best_len = l;
|
496
|
+
}
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
/* At higher iterations look only for new last distance matches, since
|
501
|
+
looking only for new command start positions with the same distances
|
502
|
+
does not help much. */
|
503
|
+
if (k >= 2) continue;
|
504
|
+
|
505
|
+
{
|
506
|
+
/* Loop through all possible copy lengths at this position. */
|
507
|
+
size_t len = min_len;
|
508
|
+
for (j = 0; j < num_matches; ++j) {
|
509
|
+
BackwardMatch match = matches[j];
|
510
|
+
size_t dist = match.distance;
|
511
|
+
BROTLI_BOOL is_dictionary_match =
|
512
|
+
TO_BROTLI_BOOL(dist > dictionary_start + gap);
|
513
|
+
/* We already tried all possible last distance matches, so we can use
|
514
|
+
normal distance code here. */
|
515
|
+
size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
|
516
|
+
uint16_t dist_symbol;
|
517
|
+
uint32_t distextra;
|
518
|
+
uint32_t distnumextra;
|
519
|
+
float dist_cost;
|
520
|
+
size_t max_match_len;
|
521
|
+
PrefixEncodeCopyDistance(
|
522
|
+
dist_code, params->dist.num_direct_distance_codes,
|
523
|
+
params->dist.distance_postfix_bits, &dist_symbol, &distextra);
|
524
|
+
distnumextra = dist_symbol >> 10;
|
525
|
+
dist_cost = base_cost + (float)distnumextra +
|
526
|
+
ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
|
527
|
+
|
528
|
+
/* Try all copy lengths up until the maximum copy length corresponding
|
529
|
+
to this distance. If the distance refers to the static dictionary, or
|
530
|
+
the maximum length is long enough, try only one maximum length. */
|
531
|
+
max_match_len = BackwardMatchLength(&match);
|
532
|
+
if (len < max_match_len &&
|
533
|
+
(is_dictionary_match || max_match_len > max_zopfli_len)) {
|
534
|
+
len = max_match_len;
|
535
|
+
}
|
536
|
+
for (; len <= max_match_len; ++len) {
|
537
|
+
const size_t len_code =
|
538
|
+
is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
|
539
|
+
const uint16_t copycode = GetCopyLengthCode(len_code);
|
540
|
+
const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
|
541
|
+
const float cost = dist_cost + (float)GetCopyExtra(copycode) +
|
542
|
+
ZopfliCostModelGetCommandCost(model, cmdcode);
|
543
|
+
if (cost < nodes[pos + len].u.cost) {
|
544
|
+
UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
|
545
|
+
result = BROTLI_MAX(size_t, result, len);
|
546
|
+
}
|
547
|
+
}
|
548
|
+
}
|
549
|
+
}
|
550
|
+
}
|
551
|
+
return result;
|
552
|
+
}
|
553
|
+
|
554
|
+
static size_t ComputeShortestPathFromNodes(size_t num_bytes,
|
555
|
+
ZopfliNode* nodes) {
|
556
|
+
size_t index = num_bytes;
|
557
|
+
size_t num_commands = 0;
|
558
|
+
while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
|
559
|
+
nodes[index].length == 1) --index;
|
560
|
+
nodes[index].u.next = BROTLI_UINT32_MAX;
|
561
|
+
while (index != 0) {
|
562
|
+
size_t len = ZopfliNodeCommandLength(&nodes[index]);
|
563
|
+
index -= len;
|
564
|
+
nodes[index].u.next = (uint32_t)len;
|
565
|
+
num_commands++;
|
566
|
+
}
|
567
|
+
return num_commands;
|
568
|
+
}
|
569
|
+
|
570
|
+
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
|
571
|
+
void BrotliZopfliCreateCommands(const size_t num_bytes,
|
572
|
+
const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
|
573
|
+
size_t* last_insert_len, const BrotliEncoderParams* params,
|
574
|
+
Command* commands, size_t* num_literals) {
|
575
|
+
const size_t stream_offset = params->stream_offset;
|
576
|
+
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
577
|
+
size_t pos = 0;
|
578
|
+
uint32_t offset = nodes[0].u.next;
|
579
|
+
size_t i;
|
580
|
+
size_t gap = 0;
|
581
|
+
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
|
582
|
+
const ZopfliNode* next = &nodes[pos + offset];
|
583
|
+
size_t copy_length = ZopfliNodeCopyLength(next);
|
584
|
+
size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
|
585
|
+
pos += insert_length;
|
586
|
+
offset = next->u.next;
|
587
|
+
if (i == 0) {
|
588
|
+
insert_length += *last_insert_len;
|
589
|
+
*last_insert_len = 0;
|
590
|
+
}
|
591
|
+
{
|
592
|
+
size_t distance = ZopfliNodeCopyDistance(next);
|
593
|
+
size_t len_code = ZopfliNodeLengthCode(next);
|
594
|
+
size_t dictionary_start = BROTLI_MIN(size_t,
|
595
|
+
block_start + pos + stream_offset, max_backward_limit);
|
596
|
+
BROTLI_BOOL is_dictionary =
|
597
|
+
TO_BROTLI_BOOL(distance > dictionary_start + gap);
|
598
|
+
size_t dist_code = ZopfliNodeDistanceCode(next);
|
599
|
+
InitCommand(&commands[i], ¶ms->dist, insert_length,
|
600
|
+
copy_length, (int)len_code - (int)copy_length, dist_code);
|
601
|
+
|
602
|
+
if (!is_dictionary && dist_code > 0) {
|
603
|
+
dist_cache[3] = dist_cache[2];
|
604
|
+
dist_cache[2] = dist_cache[1];
|
605
|
+
dist_cache[1] = dist_cache[0];
|
606
|
+
dist_cache[0] = (int)distance;
|
607
|
+
}
|
608
|
+
}
|
609
|
+
|
610
|
+
*num_literals += insert_length;
|
611
|
+
pos += copy_length;
|
612
|
+
}
|
613
|
+
*last_insert_len += num_bytes - pos;
|
614
|
+
}
|
615
|
+
|
616
|
+
static size_t ZopfliIterate(size_t num_bytes, size_t position,
|
617
|
+
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
618
|
+
const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
|
619
|
+
const ZopfliCostModel* model, const uint32_t* num_matches,
|
620
|
+
const BackwardMatch* matches, ZopfliNode* nodes) {
|
621
|
+
const size_t stream_offset = params->stream_offset;
|
622
|
+
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
623
|
+
const size_t max_zopfli_len = MaxZopfliLen(params);
|
624
|
+
StartPosQueue queue;
|
625
|
+
size_t cur_match_pos = 0;
|
626
|
+
size_t i;
|
627
|
+
nodes[0].length = 0;
|
628
|
+
nodes[0].u.cost = 0;
|
629
|
+
InitStartPosQueue(&queue);
|
630
|
+
for (i = 0; i + 3 < num_bytes; i++) {
|
631
|
+
size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
|
632
|
+
ringbuffer_mask, params, max_backward_limit, dist_cache,
|
633
|
+
num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
|
634
|
+
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
|
635
|
+
cur_match_pos += num_matches[i];
|
636
|
+
if (num_matches[i] == 1 &&
|
637
|
+
BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
|
638
|
+
skip = BROTLI_MAX(size_t,
|
639
|
+
BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
|
640
|
+
}
|
641
|
+
if (skip > 1) {
|
642
|
+
skip--;
|
643
|
+
while (skip) {
|
644
|
+
i++;
|
645
|
+
if (i + 3 >= num_bytes) break;
|
646
|
+
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
|
647
|
+
dist_cache, model, &queue, nodes);
|
648
|
+
cur_match_pos += num_matches[i];
|
649
|
+
skip--;
|
650
|
+
}
|
651
|
+
}
|
652
|
+
}
|
653
|
+
return ComputeShortestPathFromNodes(num_bytes, nodes);
|
654
|
+
}
|
655
|
+
|
656
|
+
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
|
657
|
+
size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
|
658
|
+
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
659
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
660
|
+
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes) {
|
661
|
+
const size_t stream_offset = params->stream_offset;
|
662
|
+
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
663
|
+
const size_t max_zopfli_len = MaxZopfliLen(params);
|
664
|
+
ZopfliCostModel model;
|
665
|
+
StartPosQueue queue;
|
666
|
+
BackwardMatch matches[2 * (MAX_NUM_MATCHES_H10 + 64)];
|
667
|
+
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
|
668
|
+
position + num_bytes - StoreLookaheadH10() + 1 : position;
|
669
|
+
size_t i;
|
670
|
+
size_t gap = 0;
|
671
|
+
size_t lz_matches_offset = 0;
|
672
|
+
BROTLI_UNUSED(literal_context_lut);
|
673
|
+
nodes[0].length = 0;
|
674
|
+
nodes[0].u.cost = 0;
|
675
|
+
InitZopfliCostModel(m, &model, ¶ms->dist, num_bytes);
|
676
|
+
if (BROTLI_IS_OOM(m)) return 0;
|
677
|
+
ZopfliCostModelSetFromLiteralCosts(
|
678
|
+
&model, position, ringbuffer, ringbuffer_mask);
|
679
|
+
InitStartPosQueue(&queue);
|
680
|
+
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
|
681
|
+
const size_t pos = position + i;
|
682
|
+
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
683
|
+
const size_t dictionary_start = BROTLI_MIN(size_t,
|
684
|
+
pos + stream_offset, max_backward_limit);
|
685
|
+
size_t skip;
|
686
|
+
size_t num_matches;
|
687
|
+
num_matches = FindAllMatchesH10(&hasher->privat._H10,
|
688
|
+
¶ms->dictionary,
|
689
|
+
ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
|
690
|
+
dictionary_start + gap, params, &matches[lz_matches_offset]);
|
691
|
+
if (num_matches > 0 &&
|
692
|
+
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
|
693
|
+
matches[0] = matches[num_matches - 1];
|
694
|
+
num_matches = 1;
|
695
|
+
}
|
696
|
+
skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
697
|
+
params, max_backward_limit, dist_cache, num_matches, matches, &model,
|
698
|
+
&queue, nodes);
|
699
|
+
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
|
700
|
+
if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
|
701
|
+
skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
|
702
|
+
}
|
703
|
+
if (skip > 1) {
|
704
|
+
/* Add the tail of the copy to the hasher. */
|
705
|
+
StoreRangeH10(&hasher->privat._H10,
|
706
|
+
ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
|
707
|
+
size_t, pos + skip, store_end));
|
708
|
+
skip--;
|
709
|
+
while (skip) {
|
710
|
+
i++;
|
711
|
+
if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
|
712
|
+
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
|
713
|
+
dist_cache, &model, &queue, nodes);
|
714
|
+
skip--;
|
715
|
+
}
|
716
|
+
}
|
717
|
+
}
|
718
|
+
CleanupZopfliCostModel(m, &model);
|
719
|
+
return ComputeShortestPathFromNodes(num_bytes, nodes);
|
720
|
+
}
|
721
|
+
|
722
|
+
void BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
|
723
|
+
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
724
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
725
|
+
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
726
|
+
Command* commands, size_t* num_commands, size_t* num_literals) {
|
727
|
+
ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
728
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
|
729
|
+
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
730
|
+
*num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
|
731
|
+
position, ringbuffer, ringbuffer_mask, literal_context_lut, params,
|
732
|
+
dist_cache, hasher, nodes);
|
733
|
+
if (BROTLI_IS_OOM(m)) return;
|
734
|
+
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
|
735
|
+
last_insert_len, params, commands, num_literals);
|
736
|
+
BROTLI_FREE(m, nodes);
|
737
|
+
}
|
738
|
+
|
739
|
+
void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
|
740
|
+
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
741
|
+
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
742
|
+
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
743
|
+
Command* commands, size_t* num_commands, size_t* num_literals) {
|
744
|
+
const size_t stream_offset = params->stream_offset;
|
745
|
+
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
746
|
+
uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
|
747
|
+
size_t matches_size = 4 * num_bytes;
|
748
|
+
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
|
749
|
+
position + num_bytes - StoreLookaheadH10() + 1 : position;
|
750
|
+
size_t cur_match_pos = 0;
|
751
|
+
size_t i;
|
752
|
+
size_t orig_num_literals;
|
753
|
+
size_t orig_last_insert_len;
|
754
|
+
int orig_dist_cache[4];
|
755
|
+
size_t orig_num_commands;
|
756
|
+
ZopfliCostModel model;
|
757
|
+
ZopfliNode* nodes;
|
758
|
+
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
|
759
|
+
size_t gap = 0;
|
760
|
+
size_t shadow_matches = 0;
|
761
|
+
BROTLI_UNUSED(literal_context_lut);
|
762
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(num_matches) ||
|
763
|
+
BROTLI_IS_NULL(matches)) {
|
764
|
+
return;
|
765
|
+
}
|
766
|
+
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
|
767
|
+
const size_t pos = position + i;
|
768
|
+
size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
769
|
+
size_t dictionary_start = BROTLI_MIN(size_t,
|
770
|
+
pos + stream_offset, max_backward_limit);
|
771
|
+
size_t max_length = num_bytes - i;
|
772
|
+
size_t num_found_matches;
|
773
|
+
size_t cur_match_end;
|
774
|
+
size_t j;
|
775
|
+
/* Ensure that we have enough free slots. */
|
776
|
+
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
|
777
|
+
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
|
778
|
+
if (BROTLI_IS_OOM(m)) return;
|
779
|
+
num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
|
780
|
+
¶ms->dictionary,
|
781
|
+
ringbuffer, ringbuffer_mask, pos, max_length,
|
782
|
+
max_distance, dictionary_start + gap, params,
|
783
|
+
&matches[cur_match_pos + shadow_matches]);
|
784
|
+
cur_match_end = cur_match_pos + num_found_matches;
|
785
|
+
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
|
786
|
+
BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
|
787
|
+
BackwardMatchLength(&matches[j + 1]));
|
788
|
+
}
|
789
|
+
num_matches[i] = (uint32_t)num_found_matches;
|
790
|
+
if (num_found_matches > 0) {
|
791
|
+
const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
|
792
|
+
if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
|
793
|
+
const size_t skip = match_len - 1;
|
794
|
+
matches[cur_match_pos++] = matches[cur_match_end - 1];
|
795
|
+
num_matches[i] = 1;
|
796
|
+
/* Add the tail of the copy to the hasher. */
|
797
|
+
StoreRangeH10(&hasher->privat._H10,
|
798
|
+
ringbuffer, ringbuffer_mask, pos + 1,
|
799
|
+
BROTLI_MIN(size_t, pos + match_len, store_end));
|
800
|
+
memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
|
801
|
+
i += skip;
|
802
|
+
} else {
|
803
|
+
cur_match_pos = cur_match_end;
|
804
|
+
}
|
805
|
+
}
|
806
|
+
}
|
807
|
+
orig_num_literals = *num_literals;
|
808
|
+
orig_last_insert_len = *last_insert_len;
|
809
|
+
memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
810
|
+
orig_num_commands = *num_commands;
|
811
|
+
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
812
|
+
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
|
813
|
+
InitZopfliCostModel(m, &model, ¶ms->dist, num_bytes);
|
814
|
+
if (BROTLI_IS_OOM(m)) return;
|
815
|
+
for (i = 0; i < 2; i++) {
|
816
|
+
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
817
|
+
if (i == 0) {
|
818
|
+
ZopfliCostModelSetFromLiteralCosts(
|
819
|
+
&model, position, ringbuffer, ringbuffer_mask);
|
820
|
+
} else {
|
821
|
+
ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
|
822
|
+
ringbuffer_mask, commands, *num_commands - orig_num_commands,
|
823
|
+
orig_last_insert_len);
|
824
|
+
}
|
825
|
+
*num_commands = orig_num_commands;
|
826
|
+
*num_literals = orig_num_literals;
|
827
|
+
*last_insert_len = orig_last_insert_len;
|
828
|
+
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
|
829
|
+
*num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
|
830
|
+
ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches,
|
831
|
+
nodes);
|
832
|
+
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
|
833
|
+
last_insert_len, params, commands, num_literals);
|
834
|
+
}
|
835
|
+
CleanupZopfliCostModel(m, &model);
|
836
|
+
BROTLI_FREE(m, nodes);
|
837
|
+
BROTLI_FREE(m, matches);
|
838
|
+
BROTLI_FREE(m, num_matches);
|
839
|
+
}
|
840
|
+
|
841
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
842
|
+
} /* extern "C" */
|
843
|
+
#endif
|