isomorfeus-ferret 0.12.6 → 0.13.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -16
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
- data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +113 -58
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,85 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Heuristics for deciding about the UTF8-ness of strings. */
|
8
|
+
|
9
|
+
#include "brotli_enc_utf8_util.h"
|
10
|
+
|
11
|
+
#include "brotli_types.h"
|
12
|
+
|
13
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
14
|
+
extern "C" {
|
15
|
+
#endif
|
16
|
+
|
17
|
+
static size_t BrotliParseAsUTF8(
|
18
|
+
int* symbol, const uint8_t* input, size_t size) {
|
19
|
+
/* ASCII */
|
20
|
+
if ((input[0] & 0x80) == 0) {
|
21
|
+
*symbol = input[0];
|
22
|
+
if (*symbol > 0) {
|
23
|
+
return 1;
|
24
|
+
}
|
25
|
+
}
|
26
|
+
/* 2-byte UTF8 */
|
27
|
+
if (size > 1u &&
|
28
|
+
(input[0] & 0xE0) == 0xC0 &&
|
29
|
+
(input[1] & 0xC0) == 0x80) {
|
30
|
+
*symbol = (((input[0] & 0x1F) << 6) |
|
31
|
+
(input[1] & 0x3F));
|
32
|
+
if (*symbol > 0x7F) {
|
33
|
+
return 2;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
/* 3-byte UFT8 */
|
37
|
+
if (size > 2u &&
|
38
|
+
(input[0] & 0xF0) == 0xE0 &&
|
39
|
+
(input[1] & 0xC0) == 0x80 &&
|
40
|
+
(input[2] & 0xC0) == 0x80) {
|
41
|
+
*symbol = (((input[0] & 0x0F) << 12) |
|
42
|
+
((input[1] & 0x3F) << 6) |
|
43
|
+
(input[2] & 0x3F));
|
44
|
+
if (*symbol > 0x7FF) {
|
45
|
+
return 3;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
/* 4-byte UFT8 */
|
49
|
+
if (size > 3u &&
|
50
|
+
(input[0] & 0xF8) == 0xF0 &&
|
51
|
+
(input[1] & 0xC0) == 0x80 &&
|
52
|
+
(input[2] & 0xC0) == 0x80 &&
|
53
|
+
(input[3] & 0xC0) == 0x80) {
|
54
|
+
*symbol = (((input[0] & 0x07) << 18) |
|
55
|
+
((input[1] & 0x3F) << 12) |
|
56
|
+
((input[2] & 0x3F) << 6) |
|
57
|
+
(input[3] & 0x3F));
|
58
|
+
if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
|
59
|
+
return 4;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
/* Not UTF8, emit a special symbol above the UTF8-code space */
|
63
|
+
*symbol = 0x110000 | input[0];
|
64
|
+
return 1;
|
65
|
+
}
|
66
|
+
|
67
|
+
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
|
68
|
+
BROTLI_BOOL BrotliIsMostlyUTF8(
|
69
|
+
const uint8_t* data, const size_t pos, const size_t mask,
|
70
|
+
const size_t length, const double min_fraction) {
|
71
|
+
size_t size_utf8 = 0;
|
72
|
+
size_t i = 0;
|
73
|
+
while (i < length) {
|
74
|
+
int symbol;
|
75
|
+
size_t bytes_read =
|
76
|
+
BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
|
77
|
+
i += bytes_read;
|
78
|
+
if (symbol < 0x110000) size_utf8 += bytes_read;
|
79
|
+
}
|
80
|
+
return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
|
81
|
+
}
|
82
|
+
|
83
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
84
|
+
} /* extern "C" */
|
85
|
+
#endif
|
@@ -0,0 +1,32 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Heuristics for deciding about the UTF8-ness of strings. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_UTF8_UTIL_H_
|
10
|
+
#define BROTLI_ENC_UTF8_UTIL_H_
|
11
|
+
|
12
|
+
#include "brotli_common_platform.h"
|
13
|
+
#include "brotli_types.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
static const double kMinUTF8Ratio = 0.75;
|
20
|
+
|
21
|
+
/* Returns 1 if at least min_fraction of the bytes between pos and
|
22
|
+
pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
|
23
|
+
returns 0. */
|
24
|
+
BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
|
25
|
+
const uint8_t* data, const size_t pos, const size_t mask,
|
26
|
+
const size_t length, const double min_fraction);
|
27
|
+
|
28
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
29
|
+
} /* extern "C" */
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#endif /* BROTLI_ENC_UTF8_UTIL_H_ */
|
@@ -0,0 +1,87 @@
|
|
1
|
+
/* Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/* Write bits into a byte array. */
|
8
|
+
|
9
|
+
#ifndef BROTLI_ENC_WRITE_BITS_H_
|
10
|
+
#define BROTLI_ENC_WRITE_BITS_H_
|
11
|
+
|
12
|
+
#include "brotli_common_platform.h"
|
13
|
+
#include "brotli_types.h"
|
14
|
+
|
15
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* This function writes bits into bytes in increasing addresses, and within
|
20
|
+
a byte least-significant-bit first.
|
21
|
+
|
22
|
+
The function can write up to 56 bits in one go with WriteBits
|
23
|
+
Example: let's assume that 3 bits (Rs below) have been written already:
|
24
|
+
|
25
|
+
BYTE-0 BYTE+1 BYTE+2
|
26
|
+
|
27
|
+
0000 0RRR 0000 0000 0000 0000
|
28
|
+
|
29
|
+
Now, we could write 5 or less bits in MSB by just shifting by 3
|
30
|
+
and OR'ing to BYTE-0.
|
31
|
+
|
32
|
+
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
33
|
+
and locate the rest in BYTE+1, BYTE+2, etc. */
|
34
|
+
static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
|
35
|
+
uint64_t bits,
|
36
|
+
size_t* BROTLI_RESTRICT pos,
|
37
|
+
uint8_t* BROTLI_RESTRICT array) {
|
38
|
+
BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
|
39
|
+
(uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
|
40
|
+
(int)*pos));
|
41
|
+
BROTLI_DCHECK((bits >> n_bits) == 0);
|
42
|
+
BROTLI_DCHECK(n_bits <= 56);
|
43
|
+
#if defined(BROTLI_LITTLE_ENDIAN)
|
44
|
+
/* This branch of the code can write up to 56 bits at a time,
|
45
|
+
7 bits are lost by being perhaps already in *p and at least
|
46
|
+
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
|
47
|
+
bits are in *p and we write 57 bits, then the next write will
|
48
|
+
access a byte that was never initialized). */
|
49
|
+
{
|
50
|
+
uint8_t* p = &array[*pos >> 3];
|
51
|
+
uint64_t v = (uint64_t)(*p); /* Zero-extend 8 to 64 bits. */
|
52
|
+
v |= bits << (*pos & 7);
|
53
|
+
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
|
54
|
+
*pos += n_bits;
|
55
|
+
}
|
56
|
+
#else
|
57
|
+
/* implicit & 0xFF is assumed for uint8_t arithmetics */
|
58
|
+
{
|
59
|
+
uint8_t* array_pos = &array[*pos >> 3];
|
60
|
+
const size_t bits_reserved_in_first_byte = (*pos & 7);
|
61
|
+
size_t bits_left_to_write;
|
62
|
+
bits <<= bits_reserved_in_first_byte;
|
63
|
+
*array_pos++ |= (uint8_t)bits;
|
64
|
+
for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
|
65
|
+
bits_left_to_write >= 9;
|
66
|
+
bits_left_to_write -= 8) {
|
67
|
+
bits >>= 8;
|
68
|
+
*array_pos++ = (uint8_t)bits;
|
69
|
+
}
|
70
|
+
*array_pos = 0;
|
71
|
+
*pos += n_bits;
|
72
|
+
}
|
73
|
+
#endif
|
74
|
+
}
|
75
|
+
|
76
|
+
static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
|
77
|
+
size_t pos, uint8_t* array) {
|
78
|
+
BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos));
|
79
|
+
BROTLI_DCHECK((pos & 7) == 0);
|
80
|
+
array[pos >> 3] = 0;
|
81
|
+
}
|
82
|
+
|
83
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
84
|
+
} /* extern "C" */
|
85
|
+
#endif
|
86
|
+
|
87
|
+
#endif /* BROTLI_ENC_WRITE_BITS_H_ */
|
@@ -0,0 +1,448 @@
|
|
1
|
+
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Distributed under MIT license.
|
4
|
+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
+
*/
|
6
|
+
|
7
|
+
/**
|
8
|
+
* @file
|
9
|
+
* API for Brotli compression.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#ifndef BROTLI_ENC_ENCODE_H_
|
13
|
+
#define BROTLI_ENC_ENCODE_H_
|
14
|
+
|
15
|
+
#include "brotli_port.h"
|
16
|
+
#include "brotli_types.h"
|
17
|
+
|
18
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
19
|
+
extern "C" {
|
20
|
+
#endif
|
21
|
+
|
22
|
+
/** Minimal value for ::BROTLI_PARAM_LGWIN parameter. */
|
23
|
+
#define BROTLI_MIN_WINDOW_BITS 10
|
24
|
+
/**
|
25
|
+
* Maximal value for ::BROTLI_PARAM_LGWIN parameter.
|
26
|
+
*
|
27
|
+
* @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
|
28
|
+
*/
|
29
|
+
#define BROTLI_MAX_WINDOW_BITS 24
|
30
|
+
/**
|
31
|
+
* Maximal value for ::BROTLI_PARAM_LGWIN parameter
|
32
|
+
* in "Large Window Brotli" (32-bit).
|
33
|
+
*/
|
34
|
+
#define BROTLI_LARGE_MAX_WINDOW_BITS 30
|
35
|
+
/** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
|
36
|
+
#define BROTLI_MIN_INPUT_BLOCK_BITS 16
|
37
|
+
/** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
|
38
|
+
#define BROTLI_MAX_INPUT_BLOCK_BITS 24
|
39
|
+
/** Minimal value for ::BROTLI_PARAM_QUALITY parameter. */
|
40
|
+
#define BROTLI_MIN_QUALITY 0
|
41
|
+
/** Maximal value for ::BROTLI_PARAM_QUALITY parameter. */
|
42
|
+
#define BROTLI_MAX_QUALITY 11
|
43
|
+
|
44
|
+
/** Options for ::BROTLI_PARAM_MODE parameter. */
|
45
|
+
typedef enum BrotliEncoderMode {
|
46
|
+
/**
|
47
|
+
* Default compression mode.
|
48
|
+
*
|
49
|
+
* In this mode compressor does not know anything in advance about the
|
50
|
+
* properties of the input.
|
51
|
+
*/
|
52
|
+
BROTLI_MODE_GENERIC = 0,
|
53
|
+
/** Compression mode for UTF-8 formatted text input. */
|
54
|
+
BROTLI_MODE_TEXT = 1,
|
55
|
+
/** Compression mode used in WOFF 2.0. */
|
56
|
+
BROTLI_MODE_FONT = 2
|
57
|
+
} BrotliEncoderMode;
|
58
|
+
|
59
|
+
/** Default value for ::BROTLI_PARAM_QUALITY parameter. */
|
60
|
+
#define BROTLI_DEFAULT_QUALITY 11
|
61
|
+
/** Default value for ::BROTLI_PARAM_LGWIN parameter. */
|
62
|
+
#define BROTLI_DEFAULT_WINDOW 22
|
63
|
+
/** Default value for ::BROTLI_PARAM_MODE parameter. */
|
64
|
+
#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
|
65
|
+
|
66
|
+
/** Operations that can be performed by streaming encoder. */
|
67
|
+
typedef enum BrotliEncoderOperation {
|
68
|
+
/**
|
69
|
+
* Process input.
|
70
|
+
*
|
71
|
+
* Encoder may postpone producing output, until it has processed enough input.
|
72
|
+
*/
|
73
|
+
BROTLI_OPERATION_PROCESS = 0,
|
74
|
+
/**
|
75
|
+
* Produce output for all processed input.
|
76
|
+
*
|
77
|
+
* Actual flush is performed when input stream is depleted and there is enough
|
78
|
+
* space in output stream. This means that client should repeat
|
79
|
+
* ::BROTLI_OPERATION_FLUSH operation until @p available_in becomes @c 0, and
|
80
|
+
* ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
|
81
|
+
* via ::BrotliEncoderTakeOutput, then operation should be repeated after
|
82
|
+
* output buffer is drained.
|
83
|
+
*
|
84
|
+
* @warning Until flush is complete, client @b SHOULD @b NOT swap,
|
85
|
+
* reduce or extend input stream.
|
86
|
+
*
|
87
|
+
* When flush is complete, output data will be sufficient for decoder to
|
88
|
+
* reproduce all the given input.
|
89
|
+
*/
|
90
|
+
BROTLI_OPERATION_FLUSH = 1,
|
91
|
+
/**
|
92
|
+
* Finalize the stream.
|
93
|
+
*
|
94
|
+
* Actual finalization is performed when input stream is depleted and there is
|
95
|
+
* enough space in output stream. This means that client should repeat
|
96
|
+
* ::BROTLI_OPERATION_FINISH operation until @p available_in becomes @c 0, and
|
97
|
+
* ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
|
98
|
+
* via ::BrotliEncoderTakeOutput, then operation should be repeated after
|
99
|
+
* output buffer is drained.
|
100
|
+
*
|
101
|
+
* @warning Until finalization is complete, client @b SHOULD @b NOT swap,
|
102
|
+
* reduce or extend input stream.
|
103
|
+
*
|
104
|
+
* Helper function ::BrotliEncoderIsFinished checks if stream is finalized and
|
105
|
+
* output fully dumped.
|
106
|
+
*
|
107
|
+
* Adding more input data to finalized stream is impossible.
|
108
|
+
*/
|
109
|
+
BROTLI_OPERATION_FINISH = 2,
|
110
|
+
/**
|
111
|
+
* Emit metadata block to stream.
|
112
|
+
*
|
113
|
+
* Metadata is opaque to Brotli: neither encoder, nor decoder processes this
|
114
|
+
* data or relies on it. It may be used to pass some extra information from
|
115
|
+
* encoder client to decoder client without interfering with main data stream.
|
116
|
+
*
|
117
|
+
* @note Encoder may emit empty metadata blocks internally, to pad encoded
|
118
|
+
* stream to byte boundary.
|
119
|
+
*
|
120
|
+
* @warning Until emitting metadata is complete client @b SHOULD @b NOT swap,
|
121
|
+
* reduce or extend input stream.
|
122
|
+
*
|
123
|
+
* @warning The whole content of input buffer is considered to be the content
|
124
|
+
* of metadata block. Do @b NOT @e append metadata to input stream,
|
125
|
+
* before it is depleted with other operations.
|
126
|
+
*
|
127
|
+
* Stream is soft-flushed before metadata block is emitted. Metadata block
|
128
|
+
* @b MUST be no longer than than 16MiB.
|
129
|
+
*/
|
130
|
+
BROTLI_OPERATION_EMIT_METADATA = 3
|
131
|
+
} BrotliEncoderOperation;
|
132
|
+
|
133
|
+
/** Options to be used with ::BrotliEncoderSetParameter. */
|
134
|
+
typedef enum BrotliEncoderParameter {
|
135
|
+
/**
|
136
|
+
* Tune encoder for specific input.
|
137
|
+
*
|
138
|
+
* ::BrotliEncoderMode enumerates all available values.
|
139
|
+
*/
|
140
|
+
BROTLI_PARAM_MODE = 0,
|
141
|
+
/**
|
142
|
+
* The main compression speed-density lever.
|
143
|
+
*
|
144
|
+
* The higher the quality, the slower the compression. Range is
|
145
|
+
* from ::BROTLI_MIN_QUALITY to ::BROTLI_MAX_QUALITY.
|
146
|
+
*/
|
147
|
+
BROTLI_PARAM_QUALITY = 1,
|
148
|
+
/**
|
149
|
+
* Recommended sliding LZ77 window size.
|
150
|
+
*
|
151
|
+
* Encoder may reduce this value, e.g. if input is much smaller than
|
152
|
+
* window size.
|
153
|
+
*
|
154
|
+
* Window size is `(1 << value) - 16`.
|
155
|
+
*
|
156
|
+
* Range is from ::BROTLI_MIN_WINDOW_BITS to ::BROTLI_MAX_WINDOW_BITS.
|
157
|
+
*/
|
158
|
+
BROTLI_PARAM_LGWIN = 2,
|
159
|
+
/**
|
160
|
+
* Recommended input block size.
|
161
|
+
*
|
162
|
+
* Encoder may reduce this value, e.g. if input is much smaller than input
|
163
|
+
* block size.
|
164
|
+
*
|
165
|
+
* Range is from ::BROTLI_MIN_INPUT_BLOCK_BITS to
|
166
|
+
* ::BROTLI_MAX_INPUT_BLOCK_BITS.
|
167
|
+
*
|
168
|
+
* @note Bigger input block size allows better compression, but consumes more
|
169
|
+
* memory. \n The rough formula of memory used for temporary input
|
170
|
+
* storage is `3 << lgBlock`.
|
171
|
+
*/
|
172
|
+
BROTLI_PARAM_LGBLOCK = 3,
|
173
|
+
/**
|
174
|
+
* Flag that affects usage of "literal context modeling" format feature.
|
175
|
+
*
|
176
|
+
* This flag is a "decoding-speed vs compression ratio" trade-off.
|
177
|
+
*/
|
178
|
+
BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING = 4,
|
179
|
+
/**
|
180
|
+
* Estimated total input size for all ::BrotliEncoderCompressStream calls.
|
181
|
+
*
|
182
|
+
* The default value is 0, which means that the total input size is unknown.
|
183
|
+
*/
|
184
|
+
BROTLI_PARAM_SIZE_HINT = 5,
|
185
|
+
/**
|
186
|
+
* Flag that determines if "Large Window Brotli" is used.
|
187
|
+
*/
|
188
|
+
BROTLI_PARAM_LARGE_WINDOW = 6,
|
189
|
+
/**
|
190
|
+
* Recommended number of postfix bits (NPOSTFIX).
|
191
|
+
*
|
192
|
+
* Encoder may change this value.
|
193
|
+
*
|
194
|
+
* Range is from 0 to ::BROTLI_MAX_NPOSTFIX.
|
195
|
+
*/
|
196
|
+
BROTLI_PARAM_NPOSTFIX = 7,
|
197
|
+
/**
|
198
|
+
* Recommended number of direct distance codes (NDIRECT).
|
199
|
+
*
|
200
|
+
* Encoder may change this value.
|
201
|
+
*
|
202
|
+
* Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX).
|
203
|
+
*/
|
204
|
+
BROTLI_PARAM_NDIRECT = 8,
|
205
|
+
/**
|
206
|
+
* Number of bytes of input stream already processed by a different instance.
|
207
|
+
*
|
208
|
+
* @note It is important to configure all the encoder instances with same
|
209
|
+
* parameters (except this one) in order to allow all the encoded parts
|
210
|
+
* obey the same restrictions implied by header.
|
211
|
+
*
|
212
|
+
* If offset is not 0, then stream header is omitted.
|
213
|
+
* In any case output start is byte aligned, so for proper streams stitching
|
214
|
+
* "predecessor" stream must be flushed.
|
215
|
+
*
|
216
|
+
* Range is not artificially limited, but all the values greater or equal to
|
217
|
+
* maximal window size have the same effect. Values greater than 2**30 are not
|
218
|
+
* allowed.
|
219
|
+
*/
|
220
|
+
BROTLI_PARAM_STREAM_OFFSET = 9
|
221
|
+
} BrotliEncoderParameter;
|
222
|
+
|
223
|
+
/**
|
224
|
+
* Opaque structure that holds encoder state.
|
225
|
+
*
|
226
|
+
* Allocated and initialized with ::BrotliEncoderCreateInstance.
|
227
|
+
* Cleaned up and deallocated with ::BrotliEncoderDestroyInstance.
|
228
|
+
*/
|
229
|
+
typedef struct BrotliEncoderStateStruct BrotliEncoderState;
|
230
|
+
|
231
|
+
/**
|
232
|
+
* Sets the specified parameter to the given encoder instance.
|
233
|
+
*
|
234
|
+
* @param state encoder instance
|
235
|
+
* @param param parameter to set
|
236
|
+
* @param value new parameter value
|
237
|
+
* @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
|
238
|
+
* @returns ::BROTLI_FALSE if value of parameter can not be changed at current
|
239
|
+
* encoder state (e.g. when encoding is started, window size might be
|
240
|
+
* already encoded and therefore it is impossible to change it)
|
241
|
+
* @returns ::BROTLI_TRUE if value is accepted
|
242
|
+
* @warning invalid values might be accepted in case they would not break
|
243
|
+
* encoding process.
|
244
|
+
*/
|
245
|
+
BROTLI_ENC_API BROTLI_BOOL BrotliEncoderSetParameter(
|
246
|
+
BrotliEncoderState* state, BrotliEncoderParameter param, uint32_t value);
|
247
|
+
|
248
|
+
/**
|
249
|
+
* Creates an instance of ::BrotliEncoderState and initializes it.
|
250
|
+
*
|
251
|
+
* @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
|
252
|
+
* case they are both zero, default memory allocators are used. @p opaque is
|
253
|
+
* passed to @p alloc_func and @p free_func when they are called. @p free_func
|
254
|
+
* has to return without doing anything when asked to free a NULL pointer.
|
255
|
+
*
|
256
|
+
* @param alloc_func custom memory allocation function
|
257
|
+
* @param free_func custom memory free function
|
258
|
+
* @param opaque custom memory manager handle
|
259
|
+
* @returns @c 0 if instance can not be allocated or initialized
|
260
|
+
* @returns pointer to initialized ::BrotliEncoderState otherwise
|
261
|
+
*/
|
262
|
+
BROTLI_ENC_API BrotliEncoderState* BrotliEncoderCreateInstance(
|
263
|
+
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
|
264
|
+
|
265
|
+
/**
|
266
|
+
* Deinitializes and frees ::BrotliEncoderState instance.
|
267
|
+
*
|
268
|
+
* @param state decoder instance to be cleaned up and deallocated
|
269
|
+
*/
|
270
|
+
BROTLI_ENC_API void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
|
271
|
+
|
272
|
+
/**
|
273
|
+
* Calculates the output size bound for the given @p input_size.
|
274
|
+
*
|
275
|
+
* @warning Result is only valid if quality is at least @c 2 and, in
|
276
|
+
* case ::BrotliEncoderCompressStream was used, no flushes
|
277
|
+
* (::BROTLI_OPERATION_FLUSH) were performed.
|
278
|
+
*
|
279
|
+
* @param input_size size of projected input
|
280
|
+
* @returns @c 0 if result does not fit @c size_t
|
281
|
+
*/
|
282
|
+
BROTLI_ENC_API size_t BrotliEncoderMaxCompressedSize(size_t input_size);
|
283
|
+
|
284
|
+
/**
|
285
|
+
* Performs one-shot memory-to-memory compression.
|
286
|
+
*
|
287
|
+
* Compresses the data in @p input_buffer into @p encoded_buffer, and sets
|
288
|
+
* @p *encoded_size to the compressed length.
|
289
|
+
*
|
290
|
+
* @note If ::BrotliEncoderMaxCompressedSize(@p input_size) returns non-zero
|
291
|
+
* value, then output is guaranteed to be no longer than that.
|
292
|
+
*
|
293
|
+
* @note If @p lgwin is greater than ::BROTLI_MAX_WINDOW_BITS then resulting
|
294
|
+
* stream might be incompatible with RFC 7932; to decode such streams,
|
295
|
+
* decoder should be configured with
|
296
|
+
* ::BROTLI_DECODER_PARAM_LARGE_WINDOW = @c 1
|
297
|
+
*
|
298
|
+
* @param quality quality parameter value, e.g. ::BROTLI_DEFAULT_QUALITY
|
299
|
+
* @param lgwin lgwin parameter value, e.g. ::BROTLI_DEFAULT_WINDOW
|
300
|
+
* @param mode mode parameter value, e.g. ::BROTLI_DEFAULT_MODE
|
301
|
+
* @param input_size size of @p input_buffer
|
302
|
+
* @param input_buffer input data buffer with at least @p input_size
|
303
|
+
* addressable bytes
|
304
|
+
* @param[in, out] encoded_size @b in: size of @p encoded_buffer; \n
|
305
|
+
* @b out: length of compressed data written to
|
306
|
+
* @p encoded_buffer, or @c 0 if compression fails
|
307
|
+
* @param encoded_buffer compressed data destination buffer
|
308
|
+
* @returns ::BROTLI_FALSE in case of compression error
|
309
|
+
* @returns ::BROTLI_FALSE if output buffer is too small
|
310
|
+
* @returns ::BROTLI_TRUE otherwise
|
311
|
+
*/
|
312
|
+
BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompress(
|
313
|
+
int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
|
314
|
+
const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
|
315
|
+
size_t* encoded_size,
|
316
|
+
uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]);
|
317
|
+
|
318
|
+
/**
|
319
|
+
* Compresses input stream to output stream.
|
320
|
+
*
|
321
|
+
* The values @p *available_in and @p *available_out must specify the number of
|
322
|
+
* bytes addressable at @p *next_in and @p *next_out respectively.
|
323
|
+
* When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
|
324
|
+
*
|
325
|
+
* After each call, @p *available_in will be decremented by the amount of input
|
326
|
+
* bytes consumed, and the @p *next_in pointer will be incremented by that
|
327
|
+
* amount. Similarly, @p *available_out will be decremented by the amount of
|
328
|
+
* output bytes written, and the @p *next_out pointer will be incremented by
|
329
|
+
* that amount.
|
330
|
+
*
|
331
|
+
* @p total_out, if it is not a null-pointer, will be set to the number
|
332
|
+
* of bytes compressed since the last @p state initialization.
|
333
|
+
*
|
334
|
+
*
|
335
|
+
*
|
336
|
+
* Internally workflow consists of 3 tasks:
|
337
|
+
* -# (optionally) copy input data to internal buffer
|
338
|
+
* -# actually compress data and (optionally) store it to internal buffer
|
339
|
+
* -# (optionally) copy compressed bytes from internal buffer to output stream
|
340
|
+
*
|
341
|
+
* Whenever all 3 tasks can't move forward anymore, or error occurs, this
|
342
|
+
* method returns the control flow to caller.
|
343
|
+
*
|
344
|
+
* @p op is used to perform flush, finish the stream, or inject metadata block.
|
345
|
+
* See ::BrotliEncoderOperation for more information.
|
346
|
+
*
|
347
|
+
* Flushing the stream means forcing encoding of all input passed to encoder and
|
348
|
+
* completing the current output block, so it could be fully decoded by stream
|
349
|
+
* decoder. To perform flush set @p op to ::BROTLI_OPERATION_FLUSH.
|
350
|
+
* Under some circumstances (e.g. lack of output stream capacity) this operation
|
351
|
+
* would require several calls to ::BrotliEncoderCompressStream. The method must
|
352
|
+
* be called again until both input stream is depleted and encoder has no more
|
353
|
+
* output (see ::BrotliEncoderHasMoreOutput) after the method is called.
|
354
|
+
*
|
355
|
+
* Finishing the stream means encoding of all input passed to encoder and
|
356
|
+
* adding specific "final" marks, so stream decoder could determine that stream
|
357
|
+
* is complete. To perform finish set @p op to ::BROTLI_OPERATION_FINISH.
|
358
|
+
* Under some circumstances (e.g. lack of output stream capacity) this operation
|
359
|
+
* would require several calls to ::BrotliEncoderCompressStream. The method must
|
360
|
+
* be called again until both input stream is depleted and encoder has no more
|
361
|
+
* output (see ::BrotliEncoderHasMoreOutput) after the method is called.
|
362
|
+
*
|
363
|
+
* @warning When flushing and finishing, @p op should not change until operation
|
364
|
+
* is complete; input stream should not be swapped, reduced or
|
365
|
+
* extended as well.
|
366
|
+
*
|
367
|
+
* @param state encoder instance
|
368
|
+
* @param op requested operation
|
369
|
+
* @param[in, out] available_in @b in: amount of available input; \n
|
370
|
+
* @b out: amount of unused input
|
371
|
+
* @param[in, out] next_in pointer to the next input byte
|
372
|
+
* @param[in, out] available_out @b in: length of output buffer; \n
|
373
|
+
* @b out: remaining size of output buffer
|
374
|
+
* @param[in, out] next_out compressed output buffer cursor;
|
375
|
+
* can be @c NULL if @p available_out is @c 0
|
376
|
+
* @param[out] total_out number of bytes produced so far; can be @c NULL
|
377
|
+
* @returns ::BROTLI_FALSE if there was an error
|
378
|
+
* @returns ::BROTLI_TRUE otherwise
|
379
|
+
*/
|
380
|
+
BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompressStream(
|
381
|
+
BrotliEncoderState* state, BrotliEncoderOperation op, size_t* available_in,
|
382
|
+
const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
|
383
|
+
size_t* total_out);
|
384
|
+
|
385
|
+
/**
|
386
|
+
* Checks if encoder instance reached the final state.
|
387
|
+
*
|
388
|
+
* @param state encoder instance
|
389
|
+
* @returns ::BROTLI_TRUE if encoder is in a state where it reached the end of
|
390
|
+
* the input and produced all of the output
|
391
|
+
* @returns ::BROTLI_FALSE otherwise
|
392
|
+
*/
|
393
|
+
BROTLI_ENC_API BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* state);
|
394
|
+
|
395
|
+
/**
|
396
|
+
* Checks if encoder has more output.
|
397
|
+
*
|
398
|
+
* @param state encoder instance
|
399
|
+
* @returns ::BROTLI_TRUE, if encoder has some unconsumed output
|
400
|
+
* @returns ::BROTLI_FALSE otherwise
|
401
|
+
*/
|
402
|
+
BROTLI_ENC_API BROTLI_BOOL BrotliEncoderHasMoreOutput(
|
403
|
+
BrotliEncoderState* state);
|
404
|
+
|
405
|
+
/**
|
406
|
+
* Acquires pointer to internal output buffer.
|
407
|
+
*
|
408
|
+
* This method is used to make language bindings easier and more efficient:
|
409
|
+
* -# push data to ::BrotliEncoderCompressStream,
|
410
|
+
* until ::BrotliEncoderHasMoreOutput returns BROTL_TRUE
|
411
|
+
* -# use ::BrotliEncoderTakeOutput to peek bytes and copy to language-specific
|
412
|
+
* entity
|
413
|
+
*
|
414
|
+
* Also this could be useful if there is an output stream that is able to
|
415
|
+
* consume all the provided data (e.g. when data is saved to file system).
|
416
|
+
*
|
417
|
+
* @attention After every call to ::BrotliEncoderTakeOutput @p *size bytes of
|
418
|
+
* output are considered consumed for all consecutive calls to the
|
419
|
+
* instance methods; returned pointer becomes invalidated as well.
|
420
|
+
*
|
421
|
+
* @note Encoder output is not guaranteed to be contiguous. This means that
|
422
|
+
* after the size-unrestricted call to ::BrotliEncoderTakeOutput,
|
423
|
+
* immediate next call to ::BrotliEncoderTakeOutput may return more data.
|
424
|
+
*
|
425
|
+
* @param state encoder instance
|
426
|
+
* @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
|
427
|
+
* any amount could be handled; \n
|
428
|
+
* @b out: amount of data pointed by returned pointer and
|
429
|
+
* considered consumed; \n
|
430
|
+
* out value is never greater than in value, unless it is @c 0
|
431
|
+
* @returns pointer to output data
|
432
|
+
*/
|
433
|
+
BROTLI_ENC_API const uint8_t* BrotliEncoderTakeOutput(
|
434
|
+
BrotliEncoderState* state, size_t* size);
|
435
|
+
|
436
|
+
|
437
|
+
/**
|
438
|
+
* Gets an encoder library version.
|
439
|
+
*
|
440
|
+
* Look at BROTLI_VERSION for more information.
|
441
|
+
*/
|
442
|
+
BROTLI_ENC_API uint32_t BrotliEncoderVersion(void);
|
443
|
+
|
444
|
+
#if defined(__cplusplus) || defined(c_plusplus)
|
445
|
+
} /* extern "C" */
|
446
|
+
#endif
|
447
|
+
|
448
|
+
#endif /* BROTLI_ENC_ENCODE_H_ */
|