isomorfeus-ferret 0.12.5 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +54 -4
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
- data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +26 -25
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +113 -58
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,212 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2018 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
|
9
|
+
/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
|
10
|
+
/* JUMP = skip bytes for speedup */
|
11
|
+
|
12
|
+
/* Rolling hash for long distance long string matches. Stores one position
|
13
|
+
per bucket, bucket key is computed over a long region. */
|
14
|
+
|
15
|
+
#define HashRolling HASHER()
|
16
|
+
|
17
|
+
static const uint32_t FN(kRollingHashMul32) = 69069;
|
18
|
+
static const uint32_t FN(kInvalidPos) = 0xffffffff;
|
19
|
+
|
20
|
+
/* This hasher uses a longer forward length, but returning a higher value here
|
21
|
+
will hurt compression by the main hasher when combined with a composite
|
22
|
+
hasher. The hasher tests for forward itself instead. */
|
23
|
+
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
|
24
|
+
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
|
25
|
+
|
26
|
+
/* Computes a code from a single byte. A lookup table of 256 values could be
|
27
|
+
used, but simply adding 1 works about as good. */
|
28
|
+
static uint32_t FN(HashByte)(uint8_t byte) {
|
29
|
+
return (uint32_t)byte + 1u;
|
30
|
+
}
|
31
|
+
|
32
|
+
static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
|
33
|
+
uint32_t factor) {
|
34
|
+
return (uint32_t)(factor * state + FN(HashByte)(add));
|
35
|
+
}
|
36
|
+
|
37
|
+
static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
|
38
|
+
uint8_t rem, uint32_t factor,
|
39
|
+
uint32_t factor_remove) {
|
40
|
+
return (uint32_t)(factor * state +
|
41
|
+
FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
|
42
|
+
}
|
43
|
+
|
44
|
+
typedef struct HashRolling {
|
45
|
+
uint32_t state;
|
46
|
+
uint32_t* table;
|
47
|
+
size_t next_ix;
|
48
|
+
|
49
|
+
uint32_t chunk_len;
|
50
|
+
uint32_t factor;
|
51
|
+
uint32_t factor_remove;
|
52
|
+
} HashRolling;
|
53
|
+
|
54
|
+
static void FN(Initialize)(
|
55
|
+
HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
|
56
|
+
const BrotliEncoderParams* params) {
|
57
|
+
size_t i;
|
58
|
+
self->state = 0;
|
59
|
+
self->next_ix = 0;
|
60
|
+
|
61
|
+
self->factor = FN(kRollingHashMul32);
|
62
|
+
|
63
|
+
/* Compute the factor of the oldest byte to remove: factor**steps modulo
|
64
|
+
0xffffffff (the multiplications rely on 32-bit overflow) */
|
65
|
+
self->factor_remove = 1;
|
66
|
+
for (i = 0; i < CHUNKLEN; i += JUMP) {
|
67
|
+
self->factor_remove *= self->factor;
|
68
|
+
}
|
69
|
+
|
70
|
+
self->table = (uint32_t*)common->extra;
|
71
|
+
for (i = 0; i < NUMBUCKETS; i++) {
|
72
|
+
self->table[i] = FN(kInvalidPos);
|
73
|
+
}
|
74
|
+
|
75
|
+
BROTLI_UNUSED(params);
|
76
|
+
}
|
77
|
+
|
78
|
+
static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
|
79
|
+
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
|
80
|
+
size_t i;
|
81
|
+
/* Too small size, cannot use this hasher. */
|
82
|
+
if (input_size < CHUNKLEN) return;
|
83
|
+
self->state = 0;
|
84
|
+
for (i = 0; i < CHUNKLEN; i += JUMP) {
|
85
|
+
self->state = FN(HashRollingFunctionInitial)(
|
86
|
+
self->state, data[i], self->factor);
|
87
|
+
}
|
88
|
+
BROTLI_UNUSED(one_shot);
|
89
|
+
}
|
90
|
+
|
91
|
+
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
92
|
+
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
93
|
+
size_t input_size) {
|
94
|
+
return NUMBUCKETS * sizeof(uint32_t);
|
95
|
+
BROTLI_UNUSED(params);
|
96
|
+
BROTLI_UNUSED(one_shot);
|
97
|
+
BROTLI_UNUSED(input_size);
|
98
|
+
}
|
99
|
+
|
100
|
+
static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
|
101
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
|
102
|
+
BROTLI_UNUSED(self);
|
103
|
+
BROTLI_UNUSED(data);
|
104
|
+
BROTLI_UNUSED(mask);
|
105
|
+
BROTLI_UNUSED(ix);
|
106
|
+
}
|
107
|
+
|
108
|
+
static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
|
109
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
|
110
|
+
const size_t ix_start, const size_t ix_end) {
|
111
|
+
BROTLI_UNUSED(self);
|
112
|
+
BROTLI_UNUSED(data);
|
113
|
+
BROTLI_UNUSED(mask);
|
114
|
+
BROTLI_UNUSED(ix_start);
|
115
|
+
BROTLI_UNUSED(ix_end);
|
116
|
+
}
|
117
|
+
|
118
|
+
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
119
|
+
HashRolling* BROTLI_RESTRICT self,
|
120
|
+
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
121
|
+
size_t ring_buffer_mask) {
|
122
|
+
/* In this case we must re-initialize the hasher from scratch from the
|
123
|
+
current position. */
|
124
|
+
size_t position_masked;
|
125
|
+
size_t available = num_bytes;
|
126
|
+
if ((position & (JUMP - 1)) != 0) {
|
127
|
+
size_t diff = JUMP - (position & (JUMP - 1));
|
128
|
+
available = (diff > available) ? 0 : (available - diff);
|
129
|
+
position += diff;
|
130
|
+
}
|
131
|
+
position_masked = position & ring_buffer_mask;
|
132
|
+
/* wrapping around ringbuffer not handled. */
|
133
|
+
if (available > ring_buffer_mask - position_masked) {
|
134
|
+
available = ring_buffer_mask - position_masked;
|
135
|
+
}
|
136
|
+
|
137
|
+
FN(Prepare)(self, BROTLI_FALSE, available,
|
138
|
+
ringbuffer + (position & ring_buffer_mask));
|
139
|
+
self->next_ix = position;
|
140
|
+
BROTLI_UNUSED(num_bytes);
|
141
|
+
}
|
142
|
+
|
143
|
+
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
144
|
+
HashRolling* BROTLI_RESTRICT self,
|
145
|
+
int* BROTLI_RESTRICT distance_cache) {
|
146
|
+
BROTLI_UNUSED(self);
|
147
|
+
BROTLI_UNUSED(distance_cache);
|
148
|
+
}
|
149
|
+
|
150
|
+
static BROTLI_INLINE void FN(FindLongestMatch)(
|
151
|
+
HashRolling* BROTLI_RESTRICT self,
|
152
|
+
const BrotliEncoderDictionary* dictionary,
|
153
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
154
|
+
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
155
|
+
const size_t max_length, const size_t max_backward,
|
156
|
+
const size_t dictionary_distance, const size_t max_distance,
|
157
|
+
HasherSearchResult* BROTLI_RESTRICT out) {
|
158
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
159
|
+
size_t pos;
|
160
|
+
|
161
|
+
if ((cur_ix & (JUMP - 1)) != 0) return;
|
162
|
+
|
163
|
+
/* Not enough lookahead */
|
164
|
+
if (max_length < CHUNKLEN) return;
|
165
|
+
|
166
|
+
for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
|
167
|
+
uint32_t code = self->state & MASK;
|
168
|
+
|
169
|
+
uint8_t rem = data[pos & ring_buffer_mask];
|
170
|
+
uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
|
171
|
+
size_t found_ix = FN(kInvalidPos);
|
172
|
+
|
173
|
+
self->state = FN(HashRollingFunction)(
|
174
|
+
self->state, add, rem, self->factor, self->factor_remove);
|
175
|
+
|
176
|
+
if (code < NUMBUCKETS) {
|
177
|
+
found_ix = self->table[code];
|
178
|
+
self->table[code] = (uint32_t)pos;
|
179
|
+
if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
|
180
|
+
/* The cast to 32-bit makes backward distances up to 4GB work even
|
181
|
+
if cur_ix is above 4GB, despite using 32-bit values in the table. */
|
182
|
+
size_t backward = (uint32_t)(cur_ix - found_ix);
|
183
|
+
if (backward <= max_backward) {
|
184
|
+
const size_t found_ix_masked = found_ix & ring_buffer_mask;
|
185
|
+
const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
|
186
|
+
&data[cur_ix_masked],
|
187
|
+
max_length);
|
188
|
+
if (len >= 4 && len > out->len) {
|
189
|
+
score_t score = BackwardReferenceScore(len, backward);
|
190
|
+
if (score > out->score) {
|
191
|
+
out->len = len;
|
192
|
+
out->distance = backward;
|
193
|
+
out->score = score;
|
194
|
+
out->len_code_delta = 0;
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
}
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
self->next_ix = cur_ix + JUMP;
|
203
|
+
|
204
|
+
/* NOTE: this hasher does not search in the dictionary. It is used as
|
205
|
+
backup-hasher, the main hasher already searches in it. */
|
206
|
+
BROTLI_UNUSED(dictionary);
|
207
|
+
BROTLI_UNUSED(distance_cache);
|
208
|
+
BROTLI_UNUSED(dictionary_distance);
|
209
|
+
BROTLI_UNUSED(max_distance);
|
210
|
+
}
|
211
|
+
|
212
|
+
#undef HashRolling
|
@@ -0,0 +1,329 @@
|
|
1
|
+
/* NOLINT(build/header_guard) */
|
2
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Distributed under MIT license.
|
5
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
6
|
+
*/
|
7
|
+
|
8
|
+
/* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
|
9
|
+
MAX_TREE_SEARCH_DEPTH */
|
10
|
+
|
11
|
+
/* A (forgetful) hash table where each hash bucket contains a binary tree of
|
12
|
+
sequences whose first 4 bytes share the same hash code.
|
13
|
+
Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
|
14
|
+
position in the input data. The binary tree is sorted by the lexicographic
|
15
|
+
order of the sequences, and it is also a max-heap with respect to the
|
16
|
+
starting positions. */
|
17
|
+
|
18
|
+
#define HashToBinaryTree HASHER()
|
19
|
+
|
20
|
+
#define BUCKET_SIZE (1 << BUCKET_BITS)
|
21
|
+
|
22
|
+
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
|
23
|
+
static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
|
24
|
+
return MAX_TREE_COMP_LENGTH;
|
25
|
+
}
|
26
|
+
|
27
|
+
static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
|
28
|
+
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
|
29
|
+
/* The higher bits contain more mixture from the multiplication,
|
30
|
+
so we take our results from there. */
|
31
|
+
return h >> (32 - BUCKET_BITS);
|
32
|
+
}
|
33
|
+
|
34
|
+
typedef struct HashToBinaryTree {
|
35
|
+
/* The window size minus 1 */
|
36
|
+
size_t window_mask_;
|
37
|
+
|
38
|
+
/* Hash table that maps the 4-byte hashes of the sequence to the last
|
39
|
+
position where this hash was found, which is the root of the binary
|
40
|
+
tree of sequences that share this hash bucket. */
|
41
|
+
uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
|
42
|
+
|
43
|
+
/* A position used to mark a non-existent sequence, i.e. a tree is empty if
|
44
|
+
its root is at invalid_pos_ and a node is a leaf if both its children
|
45
|
+
are at invalid_pos_. */
|
46
|
+
uint32_t invalid_pos_;
|
47
|
+
|
48
|
+
/* --- Dynamic size members --- */
|
49
|
+
|
50
|
+
/* The union of the binary trees of each hash bucket. The root of the tree
|
51
|
+
corresponding to a hash is a sequence starting at buckets_[hash] and
|
52
|
+
the left and right children of a sequence starting at pos are
|
53
|
+
forest_[2 * pos] and forest_[2 * pos + 1]. */
|
54
|
+
uint32_t* forest_; /* uint32_t[2 * num_nodes] */
|
55
|
+
} HashToBinaryTree;
|
56
|
+
|
57
|
+
static void FN(Initialize)(
|
58
|
+
HasherCommon* common, HashToBinaryTree* BROTLI_RESTRICT self,
|
59
|
+
const BrotliEncoderParams* params) {
|
60
|
+
self->buckets_ = (uint32_t*)common->extra;
|
61
|
+
self->forest_ = &self->buckets_[BUCKET_SIZE];
|
62
|
+
|
63
|
+
self->window_mask_ = (1u << params->lgwin) - 1u;
|
64
|
+
self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
|
65
|
+
}
|
66
|
+
|
67
|
+
static void FN(Prepare)
|
68
|
+
(HashToBinaryTree* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
|
69
|
+
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
|
70
|
+
uint32_t invalid_pos = self->invalid_pos_;
|
71
|
+
uint32_t i;
|
72
|
+
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
|
73
|
+
BROTLI_UNUSED(data);
|
74
|
+
BROTLI_UNUSED(one_shot);
|
75
|
+
BROTLI_UNUSED(input_size);
|
76
|
+
for (i = 0; i < BUCKET_SIZE; i++) {
|
77
|
+
buckets[i] = invalid_pos;
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
82
|
+
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
83
|
+
size_t input_size) {
|
84
|
+
size_t num_nodes = (size_t)1 << params->lgwin;
|
85
|
+
if (one_shot && input_size < num_nodes) {
|
86
|
+
num_nodes = input_size;
|
87
|
+
}
|
88
|
+
return sizeof(uint32_t) * BUCKET_SIZE + 2 * sizeof(uint32_t) * num_nodes;
|
89
|
+
}
|
90
|
+
|
91
|
+
static BROTLI_INLINE size_t FN(LeftChildIndex)(
|
92
|
+
HashToBinaryTree* BROTLI_RESTRICT self,
|
93
|
+
const size_t pos) {
|
94
|
+
return 2 * (pos & self->window_mask_);
|
95
|
+
}
|
96
|
+
|
97
|
+
static BROTLI_INLINE size_t FN(RightChildIndex)(
|
98
|
+
HashToBinaryTree* BROTLI_RESTRICT self,
|
99
|
+
const size_t pos) {
|
100
|
+
return 2 * (pos & self->window_mask_) + 1;
|
101
|
+
}
|
102
|
+
|
103
|
+
/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
|
104
|
+
hash bucket's binary tree is searched for matches and is re-rooted at the
|
105
|
+
current position.
|
106
|
+
|
107
|
+
If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
|
108
|
+
current position is searched for matches, but the state of the hash table
|
109
|
+
is not changed, since we can not know the final sorting order of the
|
110
|
+
current (incomplete) sequence.
|
111
|
+
|
112
|
+
This function must be called with increasing cur_ix positions. */
|
113
|
+
static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
|
114
|
+
HashToBinaryTree* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
|
115
|
+
const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
|
116
|
+
const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
|
117
|
+
BackwardMatch* BROTLI_RESTRICT matches) {
|
118
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
119
|
+
const size_t max_comp_len =
|
120
|
+
BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
|
121
|
+
const BROTLI_BOOL should_reroot_tree =
|
122
|
+
TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
|
123
|
+
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
124
|
+
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
|
125
|
+
uint32_t* BROTLI_RESTRICT forest = self->forest_;
|
126
|
+
size_t prev_ix = buckets[key];
|
127
|
+
/* The forest index of the rightmost node of the left subtree of the new
|
128
|
+
root, updated as we traverse and re-root the tree of the hash bucket. */
|
129
|
+
size_t node_left = FN(LeftChildIndex)(self, cur_ix);
|
130
|
+
/* The forest index of the leftmost node of the right subtree of the new
|
131
|
+
root, updated as we traverse and re-root the tree of the hash bucket. */
|
132
|
+
size_t node_right = FN(RightChildIndex)(self, cur_ix);
|
133
|
+
/* The match length of the rightmost node of the left subtree of the new
|
134
|
+
root, updated as we traverse and re-root the tree of the hash bucket. */
|
135
|
+
size_t best_len_left = 0;
|
136
|
+
/* The match length of the leftmost node of the right subtree of the new
|
137
|
+
root, updated as we traverse and re-root the tree of the hash bucket. */
|
138
|
+
size_t best_len_right = 0;
|
139
|
+
size_t depth_remaining;
|
140
|
+
if (should_reroot_tree) {
|
141
|
+
buckets[key] = (uint32_t)cur_ix;
|
142
|
+
}
|
143
|
+
for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
|
144
|
+
const size_t backward = cur_ix - prev_ix;
|
145
|
+
const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
|
146
|
+
if (backward == 0 || backward > max_backward || depth_remaining == 0) {
|
147
|
+
if (should_reroot_tree) {
|
148
|
+
forest[node_left] = self->invalid_pos_;
|
149
|
+
forest[node_right] = self->invalid_pos_;
|
150
|
+
}
|
151
|
+
break;
|
152
|
+
}
|
153
|
+
{
|
154
|
+
const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
|
155
|
+
size_t len;
|
156
|
+
BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);
|
157
|
+
len = cur_len +
|
158
|
+
FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
|
159
|
+
&data[prev_ix_masked + cur_len],
|
160
|
+
max_length - cur_len);
|
161
|
+
BROTLI_DCHECK(
|
162
|
+
0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
|
163
|
+
if (matches && len > *best_len) {
|
164
|
+
*best_len = len;
|
165
|
+
InitBackwardMatch(matches++, backward, len);
|
166
|
+
}
|
167
|
+
if (len >= max_comp_len) {
|
168
|
+
if (should_reroot_tree) {
|
169
|
+
forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
|
170
|
+
forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
|
171
|
+
}
|
172
|
+
break;
|
173
|
+
}
|
174
|
+
if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
|
175
|
+
best_len_left = len;
|
176
|
+
if (should_reroot_tree) {
|
177
|
+
forest[node_left] = (uint32_t)prev_ix;
|
178
|
+
}
|
179
|
+
node_left = FN(RightChildIndex)(self, prev_ix);
|
180
|
+
prev_ix = forest[node_left];
|
181
|
+
} else {
|
182
|
+
best_len_right = len;
|
183
|
+
if (should_reroot_tree) {
|
184
|
+
forest[node_right] = (uint32_t)prev_ix;
|
185
|
+
}
|
186
|
+
node_right = FN(LeftChildIndex)(self, prev_ix);
|
187
|
+
prev_ix = forest[node_right];
|
188
|
+
}
|
189
|
+
}
|
190
|
+
}
|
191
|
+
return matches;
|
192
|
+
}
|
193
|
+
|
194
|
+
/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
195
|
+
length of max_length and stores the position cur_ix in the hash table.
|
196
|
+
|
197
|
+
Sets *num_matches to the number of matches found, and stores the found
|
198
|
+
matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
199
|
+
sorted by strictly increasing length and (non-strictly) increasing
|
200
|
+
distance. */
|
201
|
+
static BROTLI_INLINE size_t FN(FindAllMatches)(
|
202
|
+
HashToBinaryTree* BROTLI_RESTRICT self,
|
203
|
+
const BrotliEncoderDictionary* dictionary,
|
204
|
+
const uint8_t* BROTLI_RESTRICT data,
|
205
|
+
const size_t ring_buffer_mask, const size_t cur_ix,
|
206
|
+
const size_t max_length, const size_t max_backward,
|
207
|
+
const size_t dictionary_distance, const BrotliEncoderParams* params,
|
208
|
+
BackwardMatch* matches) {
|
209
|
+
BackwardMatch* const orig_matches = matches;
|
210
|
+
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
211
|
+
size_t best_len = 1;
|
212
|
+
const size_t short_match_max_backward =
|
213
|
+
params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
|
214
|
+
size_t stop = cur_ix - short_match_max_backward;
|
215
|
+
uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
|
216
|
+
size_t i;
|
217
|
+
if (cur_ix < short_match_max_backward) { stop = 0; }
|
218
|
+
for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
219
|
+
size_t prev_ix = i;
|
220
|
+
const size_t backward = cur_ix - prev_ix;
|
221
|
+
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
|
222
|
+
break;
|
223
|
+
}
|
224
|
+
prev_ix &= ring_buffer_mask;
|
225
|
+
if (data[cur_ix_masked] != data[prev_ix] ||
|
226
|
+
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
{
|
230
|
+
const size_t len =
|
231
|
+
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
232
|
+
max_length);
|
233
|
+
if (len > best_len) {
|
234
|
+
best_len = len;
|
235
|
+
InitBackwardMatch(matches++, backward, len);
|
236
|
+
}
|
237
|
+
}
|
238
|
+
}
|
239
|
+
if (best_len < max_length) {
|
240
|
+
matches = FN(StoreAndFindMatches)(self, data, cur_ix,
|
241
|
+
ring_buffer_mask, max_length, max_backward, &best_len, matches);
|
242
|
+
}
|
243
|
+
for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
|
244
|
+
dict_matches[i] = kInvalidMatch;
|
245
|
+
}
|
246
|
+
{
|
247
|
+
size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
|
248
|
+
if (BrotliFindAllStaticDictionaryMatches(dictionary,
|
249
|
+
&data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
|
250
|
+
size_t maxlen = BROTLI_MIN(
|
251
|
+
size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
|
252
|
+
size_t l;
|
253
|
+
for (l = minlen; l <= maxlen; ++l) {
|
254
|
+
uint32_t dict_id = dict_matches[l];
|
255
|
+
if (dict_id < kInvalidMatch) {
|
256
|
+
size_t distance = dictionary_distance + (dict_id >> 5) + 1;
|
257
|
+
if (distance <= params->dist.max_distance) {
|
258
|
+
InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
}
|
263
|
+
}
|
264
|
+
return (size_t)(matches - orig_matches);
|
265
|
+
}
|
266
|
+
|
267
|
+
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
268
|
+
current sequence, without returning any matches.
|
269
|
+
REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
|
270
|
+
static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
|
271
|
+
const uint8_t* BROTLI_RESTRICT data,
|
272
|
+
const size_t mask, const size_t ix) {
|
273
|
+
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
|
274
|
+
const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
|
275
|
+
FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
|
276
|
+
max_backward, NULL, NULL);
|
277
|
+
}
|
278
|
+
|
279
|
+
static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
|
280
|
+
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
|
281
|
+
const size_t ix_start, const size_t ix_end) {
|
282
|
+
size_t i = ix_start;
|
283
|
+
size_t j = ix_start;
|
284
|
+
if (ix_start + 63 <= ix_end) {
|
285
|
+
i = ix_end - 63;
|
286
|
+
}
|
287
|
+
if (ix_start + 512 <= i) {
|
288
|
+
for (; j < i; j += 8) {
|
289
|
+
FN(Store)(self, data, mask, j);
|
290
|
+
}
|
291
|
+
}
|
292
|
+
for (; i < ix_end; ++i) {
|
293
|
+
FN(Store)(self, data, mask, i);
|
294
|
+
}
|
295
|
+
}
|
296
|
+
|
297
|
+
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
298
|
+
HashToBinaryTree* BROTLI_RESTRICT self,
|
299
|
+
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
300
|
+
size_t ringbuffer_mask) {
|
301
|
+
if (num_bytes >= FN(HashTypeLength)() - 1 &&
|
302
|
+
position >= MAX_TREE_COMP_LENGTH) {
|
303
|
+
/* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
|
304
|
+
These could not be calculated before, since they require knowledge
|
305
|
+
of both the previous and the current block. */
|
306
|
+
const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
|
307
|
+
const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
|
308
|
+
size_t i;
|
309
|
+
for (i = i_start; i < i_end; ++i) {
|
310
|
+
/* Maximum distance is window size - 16, see section 9.1. of the spec.
|
311
|
+
Furthermore, we have to make sure that we don't look further back
|
312
|
+
from the start of the next block than the window size, otherwise we
|
313
|
+
could access already overwritten areas of the ring-buffer. */
|
314
|
+
const size_t max_backward =
|
315
|
+
self->window_mask_ - BROTLI_MAX(size_t,
|
316
|
+
BROTLI_WINDOW_GAP - 1,
|
317
|
+
position - i);
|
318
|
+
/* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
|
319
|
+
end of the current block and that we have at least
|
320
|
+
MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
|
321
|
+
FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
|
322
|
+
MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
#undef BUCKET_SIZE
|
328
|
+
|
329
|
+
#undef HashToBinaryTree
|
@@ -0,0 +1,100 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Build per-context histograms of literals, commands and distance codes. */
|
8
|
+
|
9
|
+
#include "brotli_enc_histogram.h"
|
10
|
+
|
11
|
+
#include "brotli_common_context.h"
|
12
|
+
#include "brotli_enc_block_splitter.h"
|
13
|
+
#include "brotli_enc_command.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
typedef struct BlockSplitIterator {
|
20
|
+
const BlockSplit* split_; /* Not owned. */
|
21
|
+
size_t idx_;
|
22
|
+
size_t type_;
|
23
|
+
size_t length_;
|
24
|
+
} BlockSplitIterator;
|
25
|
+
|
26
|
+
static void InitBlockSplitIterator(BlockSplitIterator* self,
|
27
|
+
const BlockSplit* split) {
|
28
|
+
self->split_ = split;
|
29
|
+
self->idx_ = 0;
|
30
|
+
self->type_ = 0;
|
31
|
+
self->length_ = split->lengths ? split->lengths[0] : 0;
|
32
|
+
}
|
33
|
+
|
34
|
+
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
|
35
|
+
if (self->length_ == 0) {
|
36
|
+
++self->idx_;
|
37
|
+
self->type_ = self->split_->types[self->idx_];
|
38
|
+
self->length_ = self->split_->lengths[self->idx_];
|
39
|
+
}
|
40
|
+
--self->length_;
|
41
|
+
}
|
42
|
+
|
43
|
+
void BrotliBuildHistogramsWithContext(
|
44
|
+
const Command* cmds, const size_t num_commands,
|
45
|
+
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
46
|
+
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
|
47
|
+
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
48
|
+
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
49
|
+
HistogramCommand* insert_and_copy_histograms,
|
50
|
+
HistogramDistance* copy_dist_histograms) {
|
51
|
+
size_t pos = start_pos;
|
52
|
+
BlockSplitIterator literal_it;
|
53
|
+
BlockSplitIterator insert_and_copy_it;
|
54
|
+
BlockSplitIterator dist_it;
|
55
|
+
size_t i;
|
56
|
+
|
57
|
+
InitBlockSplitIterator(&literal_it, literal_split);
|
58
|
+
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
|
59
|
+
InitBlockSplitIterator(&dist_it, dist_split);
|
60
|
+
for (i = 0; i < num_commands; ++i) {
|
61
|
+
const Command* cmd = &cmds[i];
|
62
|
+
size_t j;
|
63
|
+
BlockSplitIteratorNext(&insert_and_copy_it);
|
64
|
+
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
|
65
|
+
cmd->cmd_prefix_);
|
66
|
+
/* TODO: unwrap iterator blocks. */
|
67
|
+
for (j = cmd->insert_len_; j != 0; --j) {
|
68
|
+
size_t context;
|
69
|
+
BlockSplitIteratorNext(&literal_it);
|
70
|
+
context = literal_it.type_;
|
71
|
+
if (context_modes) {
|
72
|
+
ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
|
73
|
+
context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
|
74
|
+
BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
|
75
|
+
}
|
76
|
+
HistogramAddLiteral(&literal_histograms[context],
|
77
|
+
ringbuffer[pos & mask]);
|
78
|
+
prev_byte2 = prev_byte;
|
79
|
+
prev_byte = ringbuffer[pos & mask];
|
80
|
+
++pos;
|
81
|
+
}
|
82
|
+
pos += CommandCopyLen(cmd);
|
83
|
+
if (CommandCopyLen(cmd)) {
|
84
|
+
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
85
|
+
prev_byte = ringbuffer[(pos - 1) & mask];
|
86
|
+
if (cmd->cmd_prefix_ >= 128) {
|
87
|
+
size_t context;
|
88
|
+
BlockSplitIteratorNext(&dist_it);
|
89
|
+
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
|
90
|
+
CommandDistanceContext(cmd);
|
91
|
+
HistogramAddDistance(©_dist_histograms[context],
|
92
|
+
cmd->dist_prefix_ & 0x3FF);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
99
|
+
} /* extern "C" */
|
100
|
+
#endif
|
@@ -0,0 +1,63 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Models the histograms of literals, commands and distance codes. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
10
|
+
#define BROTLI_ENC_HISTOGRAM_H_
|
11
|
+
|
12
|
+
#include <string.h> /* memset */
|
13
|
+
|
14
|
+
#include "brotli_common_constants.h"
|
15
|
+
#include "brotli_common_context.h"
|
16
|
+
#include "brotli_common_platform.h"
|
17
|
+
#include "brotli_types.h"
|
18
|
+
#include "brotli_enc_block_splitter.h"
|
19
|
+
#include "brotli_enc_command.h"
|
20
|
+
|
21
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
22
|
+
extern "C" {
|
23
|
+
#endif
|
24
|
+
|
25
|
+
/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
|
26
|
+
#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
|
27
|
+
|
28
|
+
#define FN(X) X ## Literal
|
29
|
+
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
|
30
|
+
#define DataType uint8_t
|
31
|
+
#include "brotli_enc_histogram_inc.h" /* NOLINT(build/include) */
|
32
|
+
#undef DataType
|
33
|
+
#undef DATA_SIZE
|
34
|
+
#undef FN
|
35
|
+
|
36
|
+
#define FN(X) X ## Command
|
37
|
+
#define DataType uint16_t
|
38
|
+
#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
|
39
|
+
#include "brotli_enc_histogram_inc.h" /* NOLINT(build/include) */
|
40
|
+
#undef DATA_SIZE
|
41
|
+
#undef FN
|
42
|
+
|
43
|
+
#define FN(X) X ## Distance
|
44
|
+
#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
|
45
|
+
#include "brotli_enc_histogram_inc.h" /* NOLINT(build/include) */
|
46
|
+
#undef DataType
|
47
|
+
#undef DATA_SIZE
|
48
|
+
#undef FN
|
49
|
+
|
50
|
+
BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
|
51
|
+
const Command* cmds, const size_t num_commands,
|
52
|
+
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
53
|
+
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
|
54
|
+
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
55
|
+
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
56
|
+
HistogramCommand* insert_and_copy_histograms,
|
57
|
+
HistogramDistance* copy_dist_histograms);
|
58
|
+
|
59
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
60
|
+
} /* extern "C" */
|
61
|
+
#endif
|
62
|
+
|
63
|
+
#endif /* BROTLI_ENC_HISTOGRAM_H_ */
|