isomorfeus-ferret 0.12.6 → 0.13.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -16
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
- data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +113 -58
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,200 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/**
|
8
|
+
* @file
|
9
|
+
* Common constants used in decoder and encoder API.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#ifndef BROTLI_COMMON_CONSTANTS_H_
|
13
|
+
#define BROTLI_COMMON_CONSTANTS_H_
|
14
|
+
|
15
|
+
#include "brotli_common_platform.h"
|
16
|
+
#include "brotli_port.h"
|
17
|
+
#include "brotli_types.h"
|
18
|
+
|
19
|
+
/* Specification: 7.3. Encoding of the context map */
|
20
|
+
#define BROTLI_CONTEXT_MAP_MAX_RLE 16
|
21
|
+
|
22
|
+
/* Specification: 2. Compressed representation overview */
|
23
|
+
#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
|
24
|
+
|
25
|
+
/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
|
26
|
+
#define BROTLI_NUM_LITERAL_SYMBOLS 256
|
27
|
+
#define BROTLI_NUM_COMMAND_SYMBOLS 704
|
28
|
+
#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
|
29
|
+
#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
|
30
|
+
BROTLI_CONTEXT_MAP_MAX_RLE)
|
31
|
+
#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
|
32
|
+
|
33
|
+
/* Specification: 3.5. Complex prefix codes */
|
34
|
+
#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
|
35
|
+
#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
|
36
|
+
#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
|
37
|
+
/* "code length of 8 is repeated" */
|
38
|
+
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
|
39
|
+
|
40
|
+
/* "Large Window Brotli" */
|
41
|
+
|
42
|
+
/**
|
43
|
+
* The theoretical maximum number of distance bits specified for large window
|
44
|
+
* brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
|
45
|
+
* encoders and decoders only support up to 30 max distance bits, the value is
|
46
|
+
* set to 62 because it affects the large window brotli file format.
|
47
|
+
* Specifically, it affects the encoding of simple huffman tree for distances,
|
48
|
+
* see Specification RFC 7932 chapter 3.4.
|
49
|
+
*/
|
50
|
+
#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
|
51
|
+
#define BROTLI_LARGE_MIN_WBITS 10
|
52
|
+
/**
|
53
|
+
* The maximum supported large brotli window bits by the encoder and decoder.
|
54
|
+
* Large window brotli allows up to 62 bits, however the current encoder and
|
55
|
+
* decoder, designed for 32-bit integers, only support up to 30 bits maximum.
|
56
|
+
*/
|
57
|
+
#define BROTLI_LARGE_MAX_WBITS 30
|
58
|
+
|
59
|
+
/* Specification: 4. Encoding of distances */
|
60
|
+
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
|
61
|
+
/**
|
62
|
+
* Maximal number of "postfix" bits.
|
63
|
+
*
|
64
|
+
* Number of "postfix" bits is stored as 2 bits in meta-block header.
|
65
|
+
*/
|
66
|
+
#define BROTLI_MAX_NPOSTFIX 3
|
67
|
+
#define BROTLI_MAX_NDIRECT 120
|
68
|
+
#define BROTLI_MAX_DISTANCE_BITS 24U
|
69
|
+
#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
|
70
|
+
BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \
|
71
|
+
((MAXNBITS) << ((NPOSTFIX) + 1)))
|
72
|
+
/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
|
73
|
+
#define BROTLI_NUM_DISTANCE_SYMBOLS \
|
74
|
+
BROTLI_DISTANCE_ALPHABET_SIZE( \
|
75
|
+
BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
|
76
|
+
|
77
|
+
/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
|
78
|
+
brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
|
79
|
+
NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
|
80
|
+
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
|
81
|
+
|
82
|
+
/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
|
83
|
+
allows safe distance calculation without overflows, given the distance
|
84
|
+
alphabet size is limited to corresponding size
|
85
|
+
(see kLargeWindowDistanceCodeLimits). */
|
86
|
+
#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
|
87
|
+
|
88
|
+
|
89
|
+
/* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */
|
90
|
+
#define BROTLI_NUM_INS_COPY_CODES 24
|
91
|
+
|
92
|
+
/* 7.1. Context modes and context ID lookup for literals */
|
93
|
+
/* "context IDs for literals are in the range of 0..63" */
|
94
|
+
#define BROTLI_LITERAL_CONTEXT_BITS 6
|
95
|
+
|
96
|
+
/* 7.2. Context ID for distances */
|
97
|
+
#define BROTLI_DISTANCE_CONTEXT_BITS 2
|
98
|
+
|
99
|
+
/* 9.1. Format of the Stream Header */
|
100
|
+
/* Number of slack bytes for window size. Don't confuse
|
101
|
+
with BROTLI_NUM_DISTANCE_SHORT_CODES. */
|
102
|
+
#define BROTLI_WINDOW_GAP 16
|
103
|
+
#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
|
104
|
+
|
105
|
+
typedef struct BrotliDistanceCodeLimit {
|
106
|
+
uint32_t max_alphabet_size;
|
107
|
+
uint32_t max_distance;
|
108
|
+
} BrotliDistanceCodeLimit;
|
109
|
+
|
110
|
+
/* This function calculates maximal size of distance alphabet, such that the
|
111
|
+
distances greater than the given values can not be represented.
|
112
|
+
|
113
|
+
This limits are designed to support fast and safe 32-bit decoders.
|
114
|
+
"32-bit" means that signed integer values up to ((1 << 31) - 1) could be
|
115
|
+
safely expressed.
|
116
|
+
|
117
|
+
Brotli distance alphabet symbols do not represent consecutive distance
|
118
|
+
ranges. Each distance alphabet symbol (excluding direct distances and short
|
119
|
+
codes), represent interleaved (for NPOSTFIX > 0) range of distances.
|
120
|
+
A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
|
121
|
+
range. Two consecutive groups require the same amount of "extra bits".
|
122
|
+
|
123
|
+
It is important that distance alphabet represents complete "groups".
|
124
|
+
To avoid complex logic on encoder side about interleaved ranges
|
125
|
+
it was decided to restrict both sides to complete distance code "groups".
|
126
|
+
*/
|
127
|
+
BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
|
128
|
+
uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
|
129
|
+
BrotliDistanceCodeLimit result;
|
130
|
+
/* Marking this function as unused, because not all files
|
131
|
+
including "constants.h" use it -> compiler warns about that. */
|
132
|
+
BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
|
133
|
+
if (max_distance <= ndirect) {
|
134
|
+
/* This case never happens / exists only for the sake of completeness. */
|
135
|
+
result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
|
136
|
+
result.max_distance = max_distance;
|
137
|
+
return result;
|
138
|
+
} else {
|
139
|
+
/* The first prohibited value. */
|
140
|
+
uint32_t forbidden_distance = max_distance + 1;
|
141
|
+
/* Subtract "directly" encoded region. */
|
142
|
+
uint32_t offset = forbidden_distance - ndirect - 1;
|
143
|
+
uint32_t ndistbits = 0;
|
144
|
+
uint32_t tmp;
|
145
|
+
uint32_t half;
|
146
|
+
uint32_t group;
|
147
|
+
/* Postfix for the last dcode in the group. */
|
148
|
+
uint32_t postfix = (1u << npostfix) - 1;
|
149
|
+
uint32_t extra;
|
150
|
+
uint32_t start;
|
151
|
+
/* Remove postfix and "head-start". */
|
152
|
+
offset = (offset >> npostfix) + 4;
|
153
|
+
/* Calculate the number of distance bits. */
|
154
|
+
tmp = offset / 2;
|
155
|
+
/* Poor-man's log2floor, to avoid extra dependencies. */
|
156
|
+
while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
|
157
|
+
/* One bit is covered with subrange addressing ("half"). */
|
158
|
+
ndistbits--;
|
159
|
+
/* Find subrange. */
|
160
|
+
half = (offset >> ndistbits) & 1;
|
161
|
+
/* Calculate the "group" part of dcode. */
|
162
|
+
group = ((ndistbits - 1) << 1) | half;
|
163
|
+
/* Calculated "group" covers the prohibited distance value. */
|
164
|
+
if (group == 0) {
|
165
|
+
/* This case is added for correctness; does not occur for limit > 128. */
|
166
|
+
result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
|
167
|
+
result.max_distance = ndirect;
|
168
|
+
return result;
|
169
|
+
}
|
170
|
+
/* Decrement "group", so it is the last permitted "group". */
|
171
|
+
group--;
|
172
|
+
/* After group was decremented, ndistbits and half must be recalculated. */
|
173
|
+
ndistbits = (group >> 1) + 1;
|
174
|
+
/* The last available distance in the subrange has all extra bits set. */
|
175
|
+
extra = (1u << ndistbits) - 1;
|
176
|
+
/* Calculate region start. NB: ndistbits >= 1. */
|
177
|
+
start = (1u << (ndistbits + 1)) - 4;
|
178
|
+
/* Move to subregion. */
|
179
|
+
start += (group & 1) << ndistbits;
|
180
|
+
/* Calculate the alphabet size. */
|
181
|
+
result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
|
182
|
+
BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
|
183
|
+
/* Calculate the maximal distance representable by alphabet. */
|
184
|
+
result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
|
185
|
+
return result;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
/* Represents the range of values belonging to a prefix code:
|
190
|
+
[offset, offset + 2^nbits) */
|
191
|
+
typedef struct {
|
192
|
+
uint16_t offset;
|
193
|
+
uint8_t nbits;
|
194
|
+
} BrotliPrefixCodeRange;
|
195
|
+
|
196
|
+
/* "Soft-private", it is exported, but not "advertised" as API. */
|
197
|
+
BROTLI_COMMON_API extern const BrotliPrefixCodeRange
|
198
|
+
_kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
|
199
|
+
|
200
|
+
#endif /* BROTLI_COMMON_CONSTANTS_H_ */
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#include "brotli_common_context.h"
|
2
|
+
|
3
|
+
#include "brotli_types.h"
|
4
|
+
|
5
|
+
/* Common context lookup table for all context modes. */
|
6
|
+
const uint8_t _kBrotliContextLookupTable[2048] = {
|
7
|
+
/* CONTEXT_LSB6, last byte. */
|
8
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
9
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
10
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
11
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
12
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
13
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
14
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
15
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
16
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
17
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
18
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
19
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
20
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
21
|
+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
22
|
+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
23
|
+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
24
|
+
|
25
|
+
/* CONTEXT_LSB6, second last byte, */
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
|
43
|
+
/* CONTEXT_MSB6, last byte. */
|
44
|
+
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
45
|
+
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
46
|
+
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
47
|
+
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
48
|
+
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
49
|
+
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
50
|
+
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
51
|
+
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
52
|
+
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
53
|
+
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
54
|
+
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
55
|
+
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
56
|
+
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
57
|
+
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
58
|
+
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
59
|
+
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
60
|
+
|
61
|
+
/* CONTEXT_MSB6, second last byte, */
|
62
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
63
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
64
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
65
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
66
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
67
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
68
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
69
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
70
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
78
|
+
|
79
|
+
/* CONTEXT_UTF8, last byte. */
|
80
|
+
/* ASCII range. */
|
81
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
82
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
83
|
+
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
84
|
+
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
85
|
+
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
86
|
+
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
87
|
+
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
88
|
+
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
89
|
+
/* UTF8 continuation byte range. */
|
90
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
91
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
92
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
93
|
+
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
94
|
+
/* UTF8 lead byte range. */
|
95
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
96
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
97
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
98
|
+
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
99
|
+
|
100
|
+
/* CONTEXT_UTF8 second last byte. */
|
101
|
+
/* ASCII range. */
|
102
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
103
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
104
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
105
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
106
|
+
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
107
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
108
|
+
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
109
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
110
|
+
/* UTF8 continuation byte range. */
|
111
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
112
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
113
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
114
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
115
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
116
|
+
/* UTF8 lead byte range. */
|
117
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
118
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
119
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
120
|
+
|
121
|
+
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
|
122
|
+
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
123
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
124
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
125
|
+
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
126
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
127
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
128
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
129
|
+
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
130
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
131
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
132
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
133
|
+
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
134
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
135
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
136
|
+
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
137
|
+
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
|
138
|
+
|
139
|
+
/* CONTEXT_SIGNED, second last byte. */
|
140
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
141
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
142
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
143
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
144
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
145
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
146
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
147
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
148
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
149
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
150
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
151
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
152
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
153
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
154
|
+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
155
|
+
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
156
|
+
};
|
@@ -0,0 +1,113 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Lookup table to map the previous two bytes to a context id.
|
8
|
+
|
9
|
+
There are four different context modeling modes defined here:
|
10
|
+
CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
|
11
|
+
CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
|
12
|
+
CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
|
13
|
+
CONTEXT_SIGNED: second-order context model tuned for signed integers.
|
14
|
+
|
15
|
+
If |p1| and |p2| are the previous two bytes, and |mode| is current context
|
16
|
+
mode, we calculate the context as:
|
17
|
+
|
18
|
+
context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
|
19
|
+
|
20
|
+
For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
|
21
|
+
(i.e. < 128), this will be equivalent to
|
22
|
+
|
23
|
+
context = 4 * context1(p1) + context2(p2),
|
24
|
+
|
25
|
+
where context1 is based on the previous byte in the following way:
|
26
|
+
|
27
|
+
0 : non-ASCII control
|
28
|
+
1 : \t, \n, \r
|
29
|
+
2 : space
|
30
|
+
3 : other punctuation
|
31
|
+
4 : " '
|
32
|
+
5 : %
|
33
|
+
6 : ( < [ {
|
34
|
+
7 : ) > ] }
|
35
|
+
8 : , ; :
|
36
|
+
9 : .
|
37
|
+
10 : =
|
38
|
+
11 : number
|
39
|
+
12 : upper-case vowel
|
40
|
+
13 : upper-case consonant
|
41
|
+
14 : lower-case vowel
|
42
|
+
15 : lower-case consonant
|
43
|
+
|
44
|
+
and context2 is based on the second last byte:
|
45
|
+
|
46
|
+
0 : control, space
|
47
|
+
1 : punctuation
|
48
|
+
2 : upper-case letter, number
|
49
|
+
3 : lower-case letter
|
50
|
+
|
51
|
+
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
52
|
+
stream it will be a continuation byte, value between 128 and 191), the
|
53
|
+
context is the same as if the second last byte was an ASCII control or space.
|
54
|
+
|
55
|
+
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
56
|
+
be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
57
|
+
the last byte and to a lesser extent on the second last byte if it is ASCII.
|
58
|
+
|
59
|
+
If the last byte is a UTF8 continuation byte, the second last byte can be:
|
60
|
+
- continuation byte: the next byte is probably ASCII or lead byte (assuming
|
61
|
+
4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
62
|
+
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
63
|
+
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
64
|
+
|
65
|
+
The possible value combinations of the previous two bytes, the range of
|
66
|
+
context ids and the type of the next byte is summarized in the table below:
|
67
|
+
|
68
|
+
|--------\-----------------------------------------------------------------|
|
69
|
+
| \ Last byte |
|
70
|
+
| Second \---------------------------------------------------------------|
|
71
|
+
| last byte \ ASCII | cont. byte | lead byte |
|
72
|
+
| \ (0-127) | (128-191) | (192-) |
|
73
|
+
|=============|===================|=====================|==================|
|
74
|
+
| ASCII | next: ASCII/lead | not valid | next: cont. |
|
75
|
+
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
76
|
+
|-------------|-------------------|---------------------|------------------|
|
77
|
+
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
78
|
+
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
79
|
+
|-------------|-------------------|---------------------|------------------|
|
80
|
+
| lead byte | not valid | next: ASCII/lead | not valid |
|
81
|
+
| (192-207) | | context: 0 - 1 | |
|
82
|
+
|-------------|-------------------|---------------------|------------------|
|
83
|
+
| lead byte | not valid | next: cont. | not valid |
|
84
|
+
| (208-) | | context: 2 - 3 | |
|
85
|
+
|-------------|-------------------|---------------------|------------------|
|
86
|
+
*/
|
87
|
+
|
88
|
+
#ifndef BROTLI_COMMON_CONTEXT_H_
|
89
|
+
#define BROTLI_COMMON_CONTEXT_H_
|
90
|
+
|
91
|
+
#include "brotli_port.h"
|
92
|
+
#include "brotli_types.h"
|
93
|
+
|
94
|
+
typedef enum ContextType {
|
95
|
+
CONTEXT_LSB6 = 0,
|
96
|
+
CONTEXT_MSB6 = 1,
|
97
|
+
CONTEXT_UTF8 = 2,
|
98
|
+
CONTEXT_SIGNED = 3
|
99
|
+
} ContextType;
|
100
|
+
|
101
|
+
/* "Soft-private", it is exported, but not "advertised" as API. */
|
102
|
+
/* Common context lookup table for all context modes. */
|
103
|
+
BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048];
|
104
|
+
|
105
|
+
typedef const uint8_t* ContextLut;
|
106
|
+
|
107
|
+
/* typeof(MODE) == ContextType; returns ContextLut */
|
108
|
+
#define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9])
|
109
|
+
|
110
|
+
/* typeof(LUT) == ContextLut */
|
111
|
+
#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
|
112
|
+
|
113
|
+
#endif /* BROTLI_COMMON_CONTEXT_H_ */
|