isomorfeus-ferret 0.12.6 → 0.13.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -16
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
- data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +113 -58
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -6,6 +6,13 @@
|
|
6
6
|
#include <string.h>
|
7
7
|
#include <limits.h>
|
8
8
|
#include <ctype.h>
|
9
|
+
#include "brotli_decode.h"
|
10
|
+
#include "brotli_encode.h"
|
11
|
+
#include "bzlib.h"
|
12
|
+
#include "lz4frame.h"
|
13
|
+
|
14
|
+
#undef close
|
15
|
+
#undef read
|
9
16
|
|
10
17
|
extern void frt_micro_sleep(const int micro_seconds);
|
11
18
|
|
@@ -39,8 +46,9 @@ static char *ste_next(FrtTermEnum *te);
|
|
39
46
|
#define FORMAT 0
|
40
47
|
#define SEGMENTS_GEN_FILE_NAME "segments"
|
41
48
|
#define MAX_EXT_LEN 10
|
42
|
-
#define
|
43
|
-
#define
|
49
|
+
#define FRT_COMPRESSION_BUFFER_SIZE 16348
|
50
|
+
#define FRT_BROTLI_COMPRESSION_LEVEL 4
|
51
|
+
#define FRT_BZIP_COMPRESSION_LEVEL 9
|
44
52
|
|
45
53
|
/* *** Must be three characters *** */
|
46
54
|
static const char *INDEX_EXTENSIONS[] = {
|
@@ -101,29 +109,22 @@ static frt_u64 str36_to_u64(char *p)
|
|
101
109
|
* @param ext extension of the filename (including .)
|
102
110
|
* @param gen generation
|
103
111
|
*/
|
104
|
-
char *frt_fn_for_generation(char *buf,
|
105
|
-
const char *base,
|
106
|
-
const char *ext,
|
107
|
-
frt_i64 gen)
|
108
|
-
{
|
112
|
+
char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i64 gen) {
|
109
113
|
if (-1 == gen) {
|
110
114
|
return NULL;
|
111
|
-
}
|
112
|
-
else {
|
115
|
+
} else {
|
113
116
|
char b[FRT_SEGMENT_NAME_MAX_LENGTH];
|
114
117
|
char *u = u64_to_str36(b, FRT_SEGMENT_NAME_MAX_LENGTH, (frt_u64)gen);
|
115
118
|
if (ext == NULL) {
|
116
119
|
sprintf(buf, "%s_%s", base, u);
|
117
|
-
}
|
118
|
-
else {
|
120
|
+
} else {
|
119
121
|
sprintf(buf, "%s_%s.%s", base, u, ext);
|
120
122
|
}
|
121
123
|
return buf;
|
122
124
|
}
|
123
125
|
}
|
124
126
|
|
125
|
-
static char *segfn_for_generation(char *buf, frt_u64 generation)
|
126
|
-
{
|
127
|
+
static char *segfn_for_generation(char *buf, frt_u64 generation) {
|
127
128
|
char b[FRT_SEGMENT_NAME_MAX_LENGTH];
|
128
129
|
char *u = u64_to_str36(b, FRT_SEGMENT_NAME_MAX_LENGTH, generation);
|
129
130
|
sprintf(buf, FRT_SEGMENTS_FILE_NAME"_%s", u);
|
@@ -201,8 +202,7 @@ FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
|
|
201
202
|
return self;
|
202
203
|
}
|
203
204
|
|
204
|
-
FrtHash *frt_co_hash_create()
|
205
|
-
{
|
205
|
+
FrtHash *frt_co_hash_create(void) {
|
206
206
|
return frt_h_new(&co_hash, &co_eq, (frt_free_ft)NULL, (frt_free_ft)&co_destroy);
|
207
207
|
}
|
208
208
|
|
@@ -212,8 +212,7 @@ FrtHash *frt_co_hash_create()
|
|
212
212
|
*
|
213
213
|
****************************************************************************/
|
214
214
|
|
215
|
-
static void fi_set_store(FrtFieldInfo *fi,
|
216
|
-
{
|
215
|
+
static void fi_set_store(FrtFieldInfo *fi, FrtStoreValue store) {
|
217
216
|
switch (store) {
|
218
217
|
case FRT_STORE_NO:
|
219
218
|
break;
|
@@ -223,8 +222,23 @@ static void fi_set_store(FrtFieldInfo *fi, int store)
|
|
223
222
|
}
|
224
223
|
}
|
225
224
|
|
226
|
-
static void
|
227
|
-
{
|
225
|
+
static void fi_set_compression(FrtFieldInfo *fi, FrtCompressionType compression) {
|
226
|
+
switch (compression) {
|
227
|
+
case FRT_COMPRESSION_NONE:
|
228
|
+
break;
|
229
|
+
case FRT_COMPRESSION_BROTLI:
|
230
|
+
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
|
231
|
+
break;
|
232
|
+
case FRT_COMPRESSION_BZ2:
|
233
|
+
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
|
234
|
+
break;
|
235
|
+
case FRT_COMPRESSION_LZ4:
|
236
|
+
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
|
237
|
+
break;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
static void fi_set_index(FrtFieldInfo *fi, FrtIndexValue index) {
|
228
242
|
switch (index) {
|
229
243
|
case FRT_INDEX_NO:
|
230
244
|
break;
|
@@ -244,8 +258,7 @@ static void fi_set_index(FrtFieldInfo *fi, int index)
|
|
244
258
|
}
|
245
259
|
}
|
246
260
|
|
247
|
-
static void fi_set_term_vector(FrtFieldInfo *fi,
|
248
|
-
{
|
261
|
+
static void fi_set_term_vector(FrtFieldInfo *fi, FrtTermVectorValue term_vector) {
|
249
262
|
switch (term_vector) {
|
250
263
|
case FRT_TERM_VECTOR_NO:
|
251
264
|
break;
|
@@ -265,33 +278,40 @@ static void fi_set_term_vector(FrtFieldInfo *fi, int term_vector)
|
|
265
278
|
}
|
266
279
|
}
|
267
280
|
|
268
|
-
static void fi_check_params(
|
269
|
-
{
|
281
|
+
static void fi_check_params(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
270
282
|
(void)store;
|
271
283
|
if ((index == FRT_INDEX_NO) && (term_vector != FRT_TERM_VECTOR_NO)) {
|
272
|
-
FRT_RAISE(FRT_ARG_ERROR,
|
273
|
-
|
284
|
+
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
285
|
+
}
|
286
|
+
if ((compression != FRT_COMPRESSION_NONE) && (store == FRT_STORE_NO)) {
|
287
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
274
288
|
}
|
275
289
|
}
|
276
290
|
|
277
|
-
FrtFieldInfo *
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
{
|
282
|
-
FrtFieldInfo *fi = FRT_ALLOC(FrtFieldInfo);
|
291
|
+
FrtFieldInfo *frt_fi_alloc(void) {
|
292
|
+
return FRT_ALLOC(FrtFieldInfo);
|
293
|
+
}
|
294
|
+
|
295
|
+
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
283
296
|
assert(NULL != name);
|
284
|
-
fi_check_params(store, index, term_vector);
|
297
|
+
fi_check_params(store, compression, index, term_vector);
|
285
298
|
fi->name = name;
|
286
299
|
fi->boost = 1.0f;
|
287
300
|
fi->bits = 0;
|
288
301
|
fi_set_store(fi, store);
|
302
|
+
fi_set_compression(fi, compression);
|
289
303
|
fi_set_index(fi, index);
|
290
304
|
fi_set_term_vector(fi, term_vector);
|
291
305
|
fi->ref_cnt = 1;
|
306
|
+
fi->rfi = Qnil;
|
292
307
|
return fi;
|
293
308
|
}
|
294
309
|
|
310
|
+
FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
311
|
+
FrtFieldInfo *fi = frt_fi_alloc();
|
312
|
+
return frt_fi_init(fi, name, store, compression, index, term_vector);
|
313
|
+
}
|
314
|
+
|
295
315
|
void frt_fi_deref(FrtFieldInfo *fi)
|
296
316
|
{
|
297
317
|
if (0 == --(fi->ref_cnt)) {
|
@@ -299,13 +319,30 @@ void frt_fi_deref(FrtFieldInfo *fi)
|
|
299
319
|
}
|
300
320
|
}
|
301
321
|
|
322
|
+
FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
|
323
|
+
if (fi_is_compressed(fi)) {
|
324
|
+
if (fi_is_compressed_brotli(fi)) {
|
325
|
+
return FRT_COMPRESSION_BROTLI;
|
326
|
+
} else if (fi_is_compressed_bz2(fi)) {
|
327
|
+
return FRT_COMPRESSION_BZ2;
|
328
|
+
} else if (fi_is_compressed_lz4(fi)) {
|
329
|
+
return FRT_COMPRESSION_LZ4;
|
330
|
+
} else {
|
331
|
+
return FRT_COMPRESSION_BROTLI;
|
332
|
+
}
|
333
|
+
} else {
|
334
|
+
return FRT_COMPRESSION_NONE;
|
335
|
+
}
|
336
|
+
}
|
337
|
+
|
302
338
|
char *frt_fi_to_s(FrtFieldInfo *fi)
|
303
339
|
{
|
304
340
|
const char *fi_name = rb_id2name(fi->name);
|
305
341
|
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
306
342
|
char *s = str;
|
307
|
-
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s", fi_name,
|
343
|
+
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
308
344
|
fi_is_stored(fi) ? "is_stored, " : "",
|
345
|
+
fi_is_compressed(fi) ? "is_compressed, " : "",
|
309
346
|
fi_is_indexed(fi) ? "is_indexed, " : "",
|
310
347
|
fi_is_tokenized(fi) ? "is_tokenized, " : "",
|
311
348
|
fi_omit_norms(fi) ? "omit_norms, " : "",
|
@@ -327,24 +364,31 @@ char *frt_fi_to_s(FrtFieldInfo *fi)
|
|
327
364
|
*
|
328
365
|
****************************************************************************/
|
329
366
|
|
330
|
-
FrtFieldInfos *
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
367
|
+
FrtFieldInfos *frt_fis_alloc(void) {
|
368
|
+
return FRT_ALLOC(FrtFieldInfos);
|
369
|
+
}
|
370
|
+
|
371
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
372
|
+
fi_check_params(store, compression, index, term_vector);
|
335
373
|
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
336
374
|
fis->size = 0;
|
337
375
|
fis->capa = FIELD_INFOS_INIT_CAPA;
|
338
376
|
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
339
377
|
fis->store = store;
|
378
|
+
fis->compression = compression;
|
340
379
|
fis->index = index;
|
341
380
|
fis->term_vector = term_vector;
|
342
381
|
fis->ref_cnt = 1;
|
382
|
+
fis->rfis = Qnil;
|
343
383
|
return fis;
|
344
384
|
}
|
345
385
|
|
346
|
-
|
347
|
-
|
386
|
+
FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
387
|
+
FrtFieldInfos *fis = frt_fis_alloc();
|
388
|
+
return frt_fis_init(fis, store, compression, index, term_vector);
|
389
|
+
}
|
390
|
+
|
391
|
+
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
348
392
|
if (fis->size == fis->capa) {
|
349
393
|
fis->capa <<= 1;
|
350
394
|
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
@@ -358,23 +402,20 @@ FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi)
|
|
358
402
|
return fi;
|
359
403
|
}
|
360
404
|
|
361
|
-
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis,
|
362
|
-
{
|
405
|
+
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
363
406
|
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
364
407
|
}
|
365
408
|
|
366
|
-
int frt_fis_get_field_num(FrtFieldInfos *fis,
|
367
|
-
{
|
409
|
+
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
368
410
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
369
411
|
if (fi) { return fi->number; }
|
370
412
|
else { return -1; }
|
371
413
|
}
|
372
414
|
|
373
|
-
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis,
|
374
|
-
{
|
415
|
+
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
375
416
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
376
417
|
if (!fi) {
|
377
|
-
fi = (FrtFieldInfo*)frt_fi_new(name, fis->store, fis->index, fis->term_vector);
|
418
|
+
fi = (FrtFieldInfo*)frt_fi_new(name, fis->store, fis->compression, fis->index, fis->term_vector);
|
378
419
|
frt_fis_add_field(fis, fi);
|
379
420
|
}
|
380
421
|
return fi;
|
@@ -386,16 +427,14 @@ FrtFieldInfos *frt_fis_read(FrtInStream *is)
|
|
386
427
|
char *field_name;
|
387
428
|
FRT_TRY
|
388
429
|
do {
|
389
|
-
FrtStoreValue store_val;
|
390
|
-
FrtIndexValue index_val;
|
391
430
|
FrtTermVectorValue term_vector_val;
|
392
431
|
volatile int i;
|
393
432
|
union { frt_u32 i; float f; } tmp;
|
394
433
|
FrtFieldInfo *volatile fi;
|
395
|
-
store_val = (FrtStoreValue)frt_is_read_vint(is);
|
396
|
-
index_val = (FrtIndexValue)frt_is_read_vint(is);
|
434
|
+
FrtStoreValue store_val = (FrtStoreValue)frt_is_read_vint(is);
|
435
|
+
FrtIndexValue index_val = (FrtIndexValue)frt_is_read_vint(is);
|
397
436
|
term_vector_val = (FrtTermVectorValue)frt_is_read_vint(is);
|
398
|
-
fis = frt_fis_new(store_val, index_val, term_vector_val);
|
437
|
+
fis = frt_fis_new(store_val, FRT_COMPRESSION_NONE, index_val, term_vector_val); // TODO compression, read from store?
|
399
438
|
for (i = frt_is_read_vint(is); i > 0; i--) {
|
400
439
|
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
401
440
|
FRT_TRY
|
@@ -443,7 +482,8 @@ void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
|
443
482
|
static const char *store_str[] = {
|
444
483
|
":no",
|
445
484
|
":yes",
|
446
|
-
""
|
485
|
+
"",
|
486
|
+
":compressed"
|
447
487
|
};
|
448
488
|
|
449
489
|
static const char *fi_store_str(FrtFieldInfo *fi)
|
@@ -796,8 +836,7 @@ static char *sis_next_seg_file_name(char *buf, FrtStore *store)
|
|
796
836
|
|
797
837
|
#define GEN_FILE_RETRY_COUNT 10
|
798
838
|
#define GEN_LOOK_AHEAD_COUNT 10
|
799
|
-
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf,
|
800
|
-
void (*run)(FrtStore *store, FindSegmentsFile *fsf))
|
839
|
+
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir)
|
801
840
|
{
|
802
841
|
volatile int i;
|
803
842
|
volatile int gen_look_ahead_count = 0;
|
@@ -904,7 +943,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf,
|
|
904
943
|
last_gen = gen;
|
905
944
|
FRT_TRY
|
906
945
|
fsf->generation = gen;
|
907
|
-
run(store, fsf);
|
946
|
+
run(store, fsf, ir);
|
908
947
|
FRT_RETURN_EARLY();
|
909
948
|
return;
|
910
949
|
case FRT_IO_ERROR: case FRT_FILE_NOT_FOUND_ERROR: case FRT_EOF_ERROR:
|
@@ -950,7 +989,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf,
|
|
950
989
|
* prevSegmentFileName + "'" */
|
951
990
|
FRT_TRY
|
952
991
|
fsf->generation = gen - 1;
|
953
|
-
run(store, fsf);
|
992
|
+
run(store, fsf, ir);
|
954
993
|
/* TODO:LOG "success on fallback " +
|
955
994
|
* prev_seg_file_name */
|
956
995
|
|
@@ -1033,7 +1072,7 @@ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
|
1033
1072
|
}
|
1034
1073
|
}
|
1035
1074
|
|
1036
|
-
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf)
|
1075
|
+
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
1037
1076
|
{
|
1038
1077
|
int seg_cnt;
|
1039
1078
|
int i;
|
@@ -1072,7 +1111,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf)
|
|
1072
1111
|
FrtSegmentInfos *frt_sis_read(FrtStore *store)
|
1073
1112
|
{
|
1074
1113
|
FindSegmentsFile fsf;
|
1075
|
-
sis_find_segments_file(store, &fsf, &frt_sis_read_i);
|
1114
|
+
sis_find_segments_file(store, &fsf, &frt_sis_read_i, NULL);
|
1076
1115
|
return fsf.ret.sis;
|
1077
1116
|
}
|
1078
1117
|
|
@@ -1112,7 +1151,7 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
1112
1151
|
}
|
1113
1152
|
}
|
1114
1153
|
|
1115
|
-
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf)
|
1154
|
+
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
1116
1155
|
{
|
1117
1156
|
FrtInStream *is;
|
1118
1157
|
frt_u64 version;
|
@@ -1135,7 +1174,7 @@ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf)
|
|
1135
1174
|
frt_u64 frt_sis_read_current_version(FrtStore *store)
|
1136
1175
|
{
|
1137
1176
|
FindSegmentsFile fsf;
|
1138
|
-
sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i);
|
1177
|
+
sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i, NULL);
|
1139
1178
|
return fsf.ret.uint64;
|
1140
1179
|
}
|
1141
1180
|
|
@@ -1145,17 +1184,17 @@ frt_u64 frt_sis_read_current_version(FrtStore *store)
|
|
1145
1184
|
*
|
1146
1185
|
****************************************************************************/
|
1147
1186
|
|
1148
|
-
static FrtLazyDocField *lazy_df_new(
|
1149
|
-
{
|
1187
|
+
static FrtLazyDocField *lazy_df_new(ID name, const int size, FrtCompressionType compression) {
|
1150
1188
|
FrtLazyDocField *self = FRT_ALLOC(FrtLazyDocField);
|
1151
1189
|
self->name = name;
|
1152
1190
|
self->size = size;
|
1153
1191
|
self->data = FRT_ALLOC_AND_ZERO_N(FrtLazyDocFieldData, size);
|
1192
|
+
self->compression = compression;
|
1193
|
+
self->decompressed = false;
|
1154
1194
|
return self;
|
1155
1195
|
}
|
1156
1196
|
|
1157
|
-
static void lazy_df_destroy(FrtLazyDocField *self)
|
1158
|
-
{
|
1197
|
+
static void lazy_df_destroy(FrtLazyDocField *self) {
|
1159
1198
|
int i;
|
1160
1199
|
for (i = self->size - 1; i >= 0; i--) {
|
1161
1200
|
if (self->data[i].text) {
|
@@ -1166,25 +1205,246 @@ static void lazy_df_destroy(FrtLazyDocField *self)
|
|
1166
1205
|
free(self);
|
1167
1206
|
}
|
1168
1207
|
|
1169
|
-
|
1170
|
-
|
1208
|
+
static void comp_raise(void) {
|
1209
|
+
FRT_RAISE(EXCEPTION, "Compression error");
|
1210
|
+
}
|
1211
|
+
|
1212
|
+
static char *is_read_brotli_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
|
1213
|
+
int buf_out_idx = 0;
|
1214
|
+
int read_len;
|
1215
|
+
frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
|
1216
|
+
const frt_uchar *next_in;
|
1217
|
+
size_t available_in;
|
1218
|
+
frt_uchar *buf_out = NULL;
|
1219
|
+
frt_uchar *next_out;
|
1220
|
+
size_t available_out;
|
1221
|
+
|
1222
|
+
BrotliDecoderState *b_state = BrotliDecoderCreateInstance(NULL, NULL, NULL);
|
1223
|
+
BrotliDecoderResult b_result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
|
1224
|
+
if (!b_state) { comp_raise(); return NULL; }
|
1225
|
+
|
1226
|
+
do {
|
1227
|
+
read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1228
|
+
frt_is_read_bytes(is, buf_in, read_len);
|
1229
|
+
compressed_len -= read_len;
|
1230
|
+
available_in = read_len;
|
1231
|
+
next_in = buf_in;
|
1232
|
+
available_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1233
|
+
do {
|
1234
|
+
FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
|
1235
|
+
next_out = buf_out + buf_out_idx;
|
1236
|
+
b_result = BrotliDecoderDecompressStream(b_state,
|
1237
|
+
&available_in, &next_in,
|
1238
|
+
&available_out, &next_out, NULL);
|
1239
|
+
if (b_result == BROTLI_DECODER_RESULT_ERROR) { comp_raise(); return NULL; }
|
1240
|
+
buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - available_out;
|
1241
|
+
} while (b_result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
|
1242
|
+
} while (b_result != BROTLI_DECODER_RESULT_SUCCESS && compressed_len > 0);
|
1243
|
+
|
1244
|
+
BrotliDecoderDestroyInstance(b_state);
|
1245
|
+
|
1246
|
+
FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + 1);
|
1247
|
+
buf_out[buf_out_idx] = '\0';
|
1248
|
+
*len = buf_out_idx;
|
1249
|
+
return (char *)buf_out;
|
1250
|
+
}
|
1251
|
+
|
1252
|
+
static void zraise(int ret) {
|
1253
|
+
switch (ret) {
|
1254
|
+
case BZ_IO_ERROR:
|
1255
|
+
if (ferror(stdin))
|
1256
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: error reading stdin");
|
1257
|
+
if (ferror(stdout))
|
1258
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: error writing stdout");
|
1259
|
+
break;
|
1260
|
+
case BZ_CONFIG_ERROR:
|
1261
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: system configuration error");
|
1262
|
+
break;
|
1263
|
+
case BZ_SEQUENCE_ERROR: /* shouldn't occur if code is correct */
|
1264
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! sequence error");
|
1265
|
+
break;
|
1266
|
+
case BZ_PARAM_ERROR: /* shouldn't occur if code is correct */
|
1267
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! parameter error");
|
1268
|
+
break;
|
1269
|
+
case BZ_MEM_ERROR:
|
1270
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: memory error");
|
1271
|
+
break;
|
1272
|
+
case BZ_DATA_ERROR:
|
1273
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check error");
|
1274
|
+
break;
|
1275
|
+
case BZ_DATA_ERROR_MAGIC:
|
1276
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check - non-matching magic");
|
1277
|
+
break;
|
1278
|
+
case BZ_UNEXPECTED_EOF:
|
1279
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: unexpected end-of-file");
|
1280
|
+
break;
|
1281
|
+
case BZ_OUTBUFF_FULL:
|
1282
|
+
FRT_RAISE(FRT_IO_ERROR, "bzlib: output buffer full");
|
1283
|
+
break;
|
1284
|
+
default:
|
1285
|
+
FRT_RAISE(FRT_EXCEPTION, "bzlib: unknown error");
|
1286
|
+
}
|
1287
|
+
}
|
1288
|
+
|
1289
|
+
static char *is_read_bz2_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
|
1290
|
+
int buf_out_idx = 0, ret, read_len;
|
1291
|
+
char *buf_out = NULL;
|
1292
|
+
char buf_in[FRT_COMPRESSION_BUFFER_SIZE];
|
1293
|
+
bz_stream zstrm;
|
1294
|
+
zstrm.bzalloc = NULL;
|
1295
|
+
zstrm.bzfree = NULL;
|
1296
|
+
zstrm.opaque = NULL;
|
1297
|
+
zstrm.next_in = NULL;
|
1298
|
+
zstrm.avail_in = 0;
|
1299
|
+
if ((ret = BZ2_bzDecompressInit(&zstrm, 0, 0)) != BZ_OK) zraise(ret);
|
1300
|
+
|
1301
|
+
do {
|
1302
|
+
read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1303
|
+
frt_is_read_bytes(is, (frt_uchar *)buf_in, read_len);
|
1304
|
+
compressed_len -= read_len;
|
1305
|
+
zstrm.avail_in = read_len;
|
1306
|
+
zstrm.next_in = buf_in;
|
1307
|
+
zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1308
|
+
|
1309
|
+
do {
|
1310
|
+
REALLOC_N(buf_out, char, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
|
1311
|
+
zstrm.next_out = buf_out + buf_out_idx;
|
1312
|
+
ret = BZ2_bzDecompress(&zstrm);
|
1313
|
+
assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
|
1314
|
+
if (ret != BZ_OK && ret != BZ_STREAM_END) {
|
1315
|
+
(void)BZ2_bzDecompressEnd(&zstrm);
|
1316
|
+
zraise(ret);
|
1317
|
+
}
|
1318
|
+
buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
|
1319
|
+
} while (zstrm.avail_out == 0);
|
1320
|
+
} while (ret != BZ_STREAM_END && compressed_len != 0);
|
1321
|
+
|
1322
|
+
(void)BZ2_bzDecompressEnd(&zstrm);
|
1323
|
+
|
1324
|
+
FRT_REALLOC_N(buf_out, char, buf_out_idx + 1);
|
1325
|
+
buf_out[buf_out_idx] = '\0';
|
1326
|
+
|
1327
|
+
*len = buf_out_idx;
|
1328
|
+
return (char *)buf_out;
|
1329
|
+
}
|
1330
|
+
|
1331
|
+
static char *is_read_lz4_compressed_bytes(FrtInStream *is, int compressed_len, int *length) {
|
1332
|
+
frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
|
1333
|
+
char *buf_out = NULL;
|
1334
|
+
int dc_length = 0;
|
1335
|
+
LZ4F_dctx *dctx;
|
1336
|
+
LZ4F_frameInfo_t frame_info;
|
1337
|
+
LZ4F_errorCode_t dctx_status = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
|
1338
|
+
if (LZ4F_isError(dctx_status)) { *length = -1; return NULL; }
|
1339
|
+
|
1340
|
+
/* header and buffer */
|
1341
|
+
int read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1342
|
+
frt_is_read_bytes(is, buf_in, read_length);
|
1343
|
+
compressed_len -= read_length;
|
1344
|
+
|
1345
|
+
size_t consumed_size = read_length;
|
1346
|
+
size_t res = LZ4F_getFrameInfo(dctx, &frame_info, buf_in, &consumed_size);
|
1347
|
+
if (LZ4F_isError(res)) { *length = -1; return NULL; }
|
1348
|
+
size_t buf_out_length;
|
1349
|
+
switch(frame_info.blockSizeID) {
|
1350
|
+
case LZ4F_default:
|
1351
|
+
case LZ4F_max64KB:
|
1352
|
+
buf_out_length = 1 << 16;
|
1353
|
+
break;
|
1354
|
+
case LZ4F_max256KB:
|
1355
|
+
buf_out_length = 1 << 18;
|
1356
|
+
break;
|
1357
|
+
case LZ4F_max1MB:
|
1358
|
+
buf_out_length = 1 << 20;
|
1359
|
+
break;
|
1360
|
+
case LZ4F_max4MB:
|
1361
|
+
buf_out_length = 1 << 22;
|
1362
|
+
break;
|
1363
|
+
default:
|
1364
|
+
buf_out_length = 0;
|
1365
|
+
}
|
1366
|
+
|
1367
|
+
res = 1;
|
1368
|
+
int first_chunk = 1;
|
1369
|
+
|
1370
|
+
/* decompress data */
|
1371
|
+
while (res != 0) {
|
1372
|
+
if (!first_chunk) {
|
1373
|
+
read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1374
|
+
frt_is_read_bytes(is, buf_in, read_length);
|
1375
|
+
compressed_len -= read_length;
|
1376
|
+
consumed_size = 0;
|
1377
|
+
}
|
1378
|
+
first_chunk = 0;
|
1379
|
+
|
1380
|
+
char *src = (char *)(buf_in + consumed_size);
|
1381
|
+
char *src_end = (char *)buf_in + read_length;
|
1382
|
+
|
1383
|
+
while (src < src_end && res != 0){
|
1384
|
+
size_t dest_length = buf_out_length;
|
1385
|
+
size_t consumed_size = read_length;
|
1386
|
+
FRT_REALLOC_N(buf_out, char, dc_length + buf_out_length);
|
1387
|
+
res = LZ4F_decompress(dctx, buf_out + dc_length, &dest_length, src, &consumed_size, NULL);
|
1388
|
+
if (LZ4F_isError(res)) { *length = -1; return NULL; }
|
1389
|
+
dc_length += dest_length;
|
1390
|
+
src = src + consumed_size;
|
1391
|
+
}
|
1392
|
+
}
|
1393
|
+
|
1394
|
+
/* finish up */
|
1395
|
+
LZ4F_freeDecompressionContext(dctx);
|
1396
|
+
|
1397
|
+
FRT_REALLOC_N(buf_out, char, dc_length + 1);
|
1398
|
+
buf_out[dc_length] = '\0';
|
1399
|
+
|
1400
|
+
*length = dc_length;
|
1401
|
+
return buf_out;
|
1402
|
+
}
|
1403
|
+
|
1404
|
+
static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len, FrtCompressionType compression) {
|
1405
|
+
switch (compression) {
|
1406
|
+
case FRT_COMPRESSION_BROTLI:
|
1407
|
+
return is_read_brotli_compressed_bytes(is, compressed_len, len);
|
1408
|
+
case FRT_COMPRESSION_BZ2:
|
1409
|
+
return is_read_bz2_compressed_bytes(is, compressed_len, len);
|
1410
|
+
case FRT_COMPRESSION_LZ4:
|
1411
|
+
return is_read_lz4_compressed_bytes(is, compressed_len, len);
|
1412
|
+
default:
|
1413
|
+
return NULL;
|
1414
|
+
}
|
1415
|
+
}
|
1416
|
+
|
1417
|
+
char *frt_lazy_df_get_data(FrtLazyDocField *self, int i) {
|
1171
1418
|
char *text = NULL;
|
1172
1419
|
if (i < self->size && i >= 0) {
|
1173
1420
|
text = self->data[i].text;
|
1174
1421
|
if (NULL == text) {
|
1175
1422
|
const int read_len = self->data[i].length + 1;
|
1176
1423
|
frt_is_seek(self->doc->fields_in, self->data[i].start);
|
1177
|
-
self->data[i].
|
1178
|
-
|
1179
|
-
|
1424
|
+
if (self->data[i].compression != FRT_COMPRESSION_NONE) {
|
1425
|
+
self->data[i].text = text = is_read_compressed_bytes(self->doc->fields_in, read_len, &(self->data[i].length), self->data[i].compression);
|
1426
|
+
} else {
|
1427
|
+
self->data[i].text = text = FRT_ALLOC_N(char, read_len);
|
1428
|
+
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)text, read_len);
|
1429
|
+
text[read_len - 1] = '\0';
|
1430
|
+
}
|
1180
1431
|
}
|
1181
1432
|
}
|
1182
1433
|
|
1183
1434
|
return text;
|
1184
1435
|
}
|
1185
1436
|
|
1186
|
-
void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
|
1187
|
-
{
|
1437
|
+
void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len) {
|
1438
|
+
if (self->compression != FRT_COMPRESSION_NONE && !self->decompressed) {
|
1439
|
+
int i;
|
1440
|
+
self->len = 0;
|
1441
|
+
for (i = self->size-1; i >= 0; i--) {
|
1442
|
+
(void)frt_lazy_df_get_data(self, i);
|
1443
|
+
self->len += self->data[i].length + 1;
|
1444
|
+
}
|
1445
|
+
self->len--; /* each field separated by ' ' but no need to add to end */
|
1446
|
+
self->decompressed = true;
|
1447
|
+
}
|
1188
1448
|
if (start < 0 || start >= self->len) {
|
1189
1449
|
FRT_RAISE(FRT_IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
|
1190
1450
|
"is not between 0 and %d", start, self->len);
|
@@ -1196,7 +1456,33 @@ void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
|
|
1196
1456
|
FRT_RAISE(FRT_IO_ERROR, "Tried to read past end of field. Field is only %d "
|
1197
1457
|
"bytes long but tried to read to %d", self->len, start + len);
|
1198
1458
|
}
|
1199
|
-
|
1459
|
+
if (self->compression != FRT_COMPRESSION_NONE) {
|
1460
|
+
int cur_start = 0, buf_start = 0, cur_end, i, copy_start, copy_len;
|
1461
|
+
for (i = 0; i < self->size; i++) {
|
1462
|
+
cur_end = cur_start + self->data[i].length;
|
1463
|
+
if (start < cur_end) {
|
1464
|
+
copy_start = start > cur_start ? start - cur_start : 0;
|
1465
|
+
copy_len = cur_end - cur_start - copy_start;
|
1466
|
+
if (copy_len >= len) {
|
1467
|
+
copy_len = len;
|
1468
|
+
len = 0;
|
1469
|
+
}
|
1470
|
+
else {
|
1471
|
+
len -= copy_len;
|
1472
|
+
}
|
1473
|
+
memcpy(buf + buf_start,
|
1474
|
+
self->data[i].text + copy_start,
|
1475
|
+
copy_len);
|
1476
|
+
buf_start += copy_len;
|
1477
|
+
if (len > 0) {
|
1478
|
+
buf[buf_start++] = ' ';
|
1479
|
+
len--;
|
1480
|
+
}
|
1481
|
+
if (len == 0) break;
|
1482
|
+
}
|
1483
|
+
cur_start = cur_end + 1;
|
1484
|
+
}
|
1485
|
+
} else {
|
1200
1486
|
frt_is_seek(self->doc->fields_in, self->data[0].start + start);
|
1201
1487
|
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)buf, len);
|
1202
1488
|
}
|
@@ -1234,21 +1520,17 @@ static void lazy_doc_add_field(FrtLazyDoc *self, FrtLazyDocField *lazy_df, int i
|
|
1234
1520
|
lazy_df->doc = self;
|
1235
1521
|
}
|
1236
1522
|
|
1237
|
-
FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self,
|
1238
|
-
{
|
1523
|
+
FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field) {
|
1239
1524
|
return (FrtLazyDocField *)frt_h_get(self->field_dictionary, (void *)field);
|
1240
1525
|
}
|
1241
1526
|
|
1242
1527
|
/****************************************************************************
|
1243
|
-
*
|
1244
1528
|
* FrtFieldsReader
|
1245
|
-
*
|
1246
1529
|
****************************************************************************/
|
1247
1530
|
|
1248
1531
|
#define FIELDS_IDX_PTR_SIZE 12
|
1249
1532
|
|
1250
|
-
FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis)
|
1251
|
-
{
|
1533
|
+
FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis) {
|
1252
1534
|
FrtFieldsReader *fr = FRT_ALLOC(FrtFieldsReader);
|
1253
1535
|
FrtInStream *fdx_in;
|
1254
1536
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -1268,8 +1550,7 @@ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1268
1550
|
return fr;
|
1269
1551
|
}
|
1270
1552
|
|
1271
|
-
FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig)
|
1272
|
-
{
|
1553
|
+
FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig) {
|
1273
1554
|
FrtFieldsReader *fr = FRT_ALLOC(FrtFieldsReader);
|
1274
1555
|
|
1275
1556
|
memcpy(fr, orig, sizeof(FrtFieldsReader));
|
@@ -1279,25 +1560,36 @@ FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig)
|
|
1279
1560
|
return fr;
|
1280
1561
|
}
|
1281
1562
|
|
1282
|
-
void frt_fr_close(FrtFieldsReader *fr)
|
1283
|
-
{
|
1563
|
+
void frt_fr_close(FrtFieldsReader *fr) {
|
1284
1564
|
frt_is_close(fr->fdt_in);
|
1285
1565
|
frt_is_close(fr->fdx_in);
|
1286
1566
|
free(fr);
|
1287
1567
|
}
|
1288
1568
|
|
1289
|
-
static FrtDocField *frt_fr_df_new(
|
1290
|
-
{
|
1569
|
+
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression) {
|
1291
1570
|
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
1292
1571
|
df->name = name;
|
1293
1572
|
df->capa = df->size = size;
|
1294
1573
|
df->data = FRT_ALLOC_N(char *, df->capa);
|
1295
1574
|
df->lengths = FRT_ALLOC_N(int, df->capa);
|
1575
|
+
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
1296
1576
|
df->destroy_data = true;
|
1297
1577
|
df->boost = 1.0f;
|
1578
|
+
df->compression = compression;
|
1298
1579
|
return df;
|
1299
1580
|
}
|
1300
1581
|
|
1582
|
+
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression) {
|
1583
|
+
int i;
|
1584
|
+
const int df_size = df->size;
|
1585
|
+
FrtInStream *fdt_in = fr->fdt_in;
|
1586
|
+
|
1587
|
+
for (i = 0; i < df_size; i++) {
|
1588
|
+
const int compressed_len = df->lengths[i] + 1;
|
1589
|
+
df->data[i] = is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression);
|
1590
|
+
}
|
1591
|
+
}
|
1592
|
+
|
1301
1593
|
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
1302
1594
|
{
|
1303
1595
|
int i, j;
|
@@ -1316,22 +1608,28 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1316
1608
|
const int field_num = frt_is_read_vint(fdt_in);
|
1317
1609
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
1318
1610
|
const int df_size = frt_is_read_vint(fdt_in);
|
1319
|
-
FrtDocField *df = frt_fr_df_new(fi->name, df_size);
|
1611
|
+
FrtDocField *df = frt_fr_df_new(fi->name, df_size, frt_fi_get_compression(fi));
|
1320
1612
|
|
1321
1613
|
for (j = 0; j < df_size; j++) {
|
1322
1614
|
df->lengths[j] = frt_is_read_vint(fdt_in);
|
1615
|
+
df->encodings[j] = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1616
|
+
df->compression = frt_is_read_vint(fdt_in);
|
1323
1617
|
}
|
1324
1618
|
|
1325
1619
|
frt_doc_add_field(doc, df);
|
1326
1620
|
}
|
1327
1621
|
for (i = 0; i < stored_cnt; i++) {
|
1328
1622
|
FrtDocField *df = doc->fields[i];
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1623
|
+
if (df->compression != FRT_COMPRESSION_NONE) {
|
1624
|
+
frt_fr_read_compressed_fields(fr, df, df->compression);
|
1625
|
+
} else {
|
1626
|
+
const int df_size = df->size;
|
1627
|
+
for (j = 0; j < df_size; j++) {
|
1628
|
+
const int read_len = df->lengths[j] + 1;
|
1629
|
+
df->data[j] = FRT_ALLOC_N(char, read_len);
|
1630
|
+
frt_is_read_bytes(fdt_in, (frt_uchar *)df->data[j], read_len);
|
1631
|
+
df->data[j][read_len - 1] = '\0';
|
1632
|
+
}
|
1335
1633
|
}
|
1336
1634
|
}
|
1337
1635
|
|
@@ -1347,31 +1645,37 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1347
1645
|
FrtLazyDoc *lazy_doc;
|
1348
1646
|
FrtInStream *fdx_in = fr->fdx_in;
|
1349
1647
|
FrtInStream *fdt_in = fr->fdt_in;
|
1648
|
+
|
1350
1649
|
frt_is_seek(fdx_in, doc_num * FIELDS_IDX_PTR_SIZE);
|
1351
1650
|
pos = (off_t)frt_is_read_u64(fdx_in);
|
1352
1651
|
frt_is_seek(fdt_in, pos);
|
1353
1652
|
stored_cnt = frt_is_read_vint(fdt_in);
|
1653
|
+
|
1354
1654
|
lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
|
1355
1655
|
for (i = 0; i < stored_cnt; i++) {
|
1356
1656
|
FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
|
1357
|
-
const int
|
1358
|
-
FrtLazyDocField *lazy_df = lazy_df_new(fi->name,
|
1657
|
+
const int df_size = frt_is_read_vint(fdt_in);
|
1658
|
+
FrtLazyDocField *lazy_df = lazy_df_new(fi->name, df_size, frt_fi_get_compression(fi));
|
1359
1659
|
const int field_start = start;
|
1360
1660
|
/* get the starts relative positions this time around */
|
1361
|
-
|
1661
|
+
|
1662
|
+
for (j = 0; j < df_size; j++) {
|
1362
1663
|
lazy_df->data[j].start = start;
|
1363
1664
|
start += 1 + (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
|
1665
|
+
lazy_df->data[j].encoding = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1666
|
+
lazy_df->data[j].compression = frt_is_read_vint(fdt_in);
|
1364
1667
|
}
|
1668
|
+
|
1365
1669
|
lazy_df->len = start - field_start - 1;
|
1366
1670
|
lazy_doc_add_field(lazy_doc, lazy_df, i);
|
1367
1671
|
}
|
1368
1672
|
/* correct the starts to their correct absolute positions */
|
1673
|
+
const off_t abs_start = frt_is_pos(fdt_in);
|
1369
1674
|
for (i = 0; i < stored_cnt; i++) {
|
1370
1675
|
FrtLazyDocField *lazy_df = lazy_doc->fields[i];
|
1371
|
-
const int
|
1372
|
-
|
1373
|
-
|
1374
|
-
lazy_df->data[j].start += start;
|
1676
|
+
const int df_size = lazy_df->size;
|
1677
|
+
for (j = 0; j < df_size; j++) {
|
1678
|
+
lazy_df->data[j].start += abs_start;
|
1375
1679
|
}
|
1376
1680
|
}
|
1377
1681
|
|
@@ -1549,11 +1853,150 @@ void frt_fw_close(FrtFieldsWriter *fw)
|
|
1549
1853
|
free(fw);
|
1550
1854
|
}
|
1551
1855
|
|
1552
|
-
|
1553
|
-
|
1856
|
+
static int frt_os_write_brotli_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
|
1857
|
+
size_t compressed_length = 0;
|
1858
|
+
const frt_uchar *next_in = data;
|
1859
|
+
size_t available_in = length;
|
1860
|
+
size_t available_out;
|
1861
|
+
frt_uchar compression_buffer[FRT_COMPRESSION_BUFFER_SIZE];
|
1862
|
+
frt_uchar *next_out;
|
1863
|
+
BrotliEncoderState *b_state = BrotliEncoderCreateInstance(NULL, NULL, NULL);
|
1864
|
+
if (!b_state) { comp_raise(); return -1; }
|
1865
|
+
|
1866
|
+
BrotliEncoderSetParameter(b_state, BROTLI_PARAM_QUALITY, FRT_BROTLI_COMPRESSION_LEVEL);
|
1867
|
+
|
1868
|
+
do {
|
1869
|
+
available_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1870
|
+
next_out = compression_buffer;
|
1871
|
+
if (!BrotliEncoderCompressStream(b_state, BROTLI_OPERATION_FINISH,
|
1872
|
+
&available_in, &next_in,
|
1873
|
+
&available_out, &next_out, &compressed_length)) {
|
1874
|
+
BrotliEncoderDestroyInstance(b_state);
|
1875
|
+
comp_raise();
|
1876
|
+
return -1;
|
1877
|
+
}
|
1878
|
+
frt_os_write_bytes(out_stream, compression_buffer, FRT_COMPRESSION_BUFFER_SIZE - available_out);
|
1879
|
+
} while (!BrotliEncoderIsFinished(b_state));
|
1880
|
+
|
1881
|
+
BrotliEncoderDestroyInstance(b_state);
|
1882
|
+
|
1883
|
+
return (int)compressed_length;
|
1884
|
+
}
|
1885
|
+
|
1886
|
+
static int frt_os_write_bz2_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
|
1887
|
+
int ret, buf_size, compressed_len = 0;
|
1888
|
+
char out_buffer[FRT_COMPRESSION_BUFFER_SIZE];
|
1889
|
+
bz_stream zstrm;
|
1890
|
+
zstrm.bzalloc = NULL;
|
1891
|
+
zstrm.bzfree = NULL;
|
1892
|
+
zstrm.opaque = NULL;
|
1893
|
+
if ((ret = BZ2_bzCompressInit(&zstrm, FRT_BZIP_COMPRESSION_LEVEL, 0, 0)) != BZ_OK) zraise(ret);
|
1894
|
+
|
1895
|
+
zstrm.avail_in = length;
|
1896
|
+
zstrm.next_in = (char *)data;
|
1897
|
+
zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1898
|
+
zstrm.next_out = out_buffer;
|
1899
|
+
|
1900
|
+
do {
|
1901
|
+
ret = BZ2_bzCompress(&zstrm, BZ_FINISH); /* no bad return value */
|
1902
|
+
assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
|
1903
|
+
compressed_len += buf_size = FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
|
1904
|
+
frt_os_write_bytes(out_stream, (frt_uchar *)out_buffer, buf_size);
|
1905
|
+
} while (zstrm.avail_out == 0);
|
1906
|
+
assert(zstrm.avail_in == 0); /* all input will be used */
|
1907
|
+
|
1908
|
+
(void)BZ2_bzCompressEnd(&zstrm);
|
1909
|
+
return compressed_len;
|
1910
|
+
}
|
1911
|
+
|
1912
|
+
static const LZ4F_preferences_t lz4_prefs = {
|
1913
|
+
{
|
1914
|
+
LZ4F_default,
|
1915
|
+
LZ4F_blockLinked,
|
1916
|
+
LZ4F_noContentChecksum,
|
1917
|
+
LZ4F_frame,
|
1918
|
+
0, /* unknown content size */
|
1919
|
+
0, /* no dictID */
|
1920
|
+
LZ4F_noBlockChecksum
|
1921
|
+
},
|
1922
|
+
0,
|
1923
|
+
1,
|
1924
|
+
1,
|
1925
|
+
{0,0,0}
|
1926
|
+
};
|
1927
|
+
|
1928
|
+
static int frt_os_write_lz4_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
|
1929
|
+
int compressed_length = 0;
|
1930
|
+
int remaining_length = length;
|
1931
|
+
size_t ccmp_length = 0;
|
1932
|
+
LZ4F_compressionContext_t ctx;
|
1933
|
+
size_t out_buf_length = LZ4F_compressBound(FRT_COMPRESSION_BUFFER_SIZE, &lz4_prefs);
|
1934
|
+
frt_uchar *out_buf = frt_ecalloc(out_buf_length);
|
1935
|
+
|
1936
|
+
size_t ctx_creation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
|
1937
|
+
if (LZ4F_isError(ctx_creation)) {
|
1938
|
+
compressed_length = -1;
|
1939
|
+
goto finish;
|
1940
|
+
}
|
1941
|
+
|
1942
|
+
/* create header */
|
1943
|
+
ccmp_length = LZ4F_compressBegin(ctx, out_buf, out_buf_length, &lz4_prefs);
|
1944
|
+
if (LZ4F_isError(ccmp_length)) {
|
1945
|
+
compressed_length = -1;
|
1946
|
+
goto finish;
|
1947
|
+
}
|
1948
|
+
compressed_length = ccmp_length;
|
1949
|
+
frt_os_write_bytes(out_stream, out_buf, ccmp_length);
|
1950
|
+
|
1951
|
+
/* compress data */
|
1952
|
+
do {
|
1953
|
+
int read_length = (FRT_COMPRESSION_BUFFER_SIZE > remaining_length) ? remaining_length : FRT_COMPRESSION_BUFFER_SIZE;
|
1954
|
+
ccmp_length = LZ4F_compressUpdate(ctx, out_buf, out_buf_length, data + (length - remaining_length), read_length, NULL);
|
1955
|
+
if (LZ4F_isError(ccmp_length)) {
|
1956
|
+
compressed_length = -1;
|
1957
|
+
goto finish;
|
1958
|
+
}
|
1959
|
+
frt_os_write_bytes(out_stream, out_buf, ccmp_length);
|
1960
|
+
compressed_length += ccmp_length;
|
1961
|
+
remaining_length -= read_length;
|
1962
|
+
} while (remaining_length > 0);
|
1963
|
+
|
1964
|
+
/* finish up */
|
1965
|
+
ccmp_length = LZ4F_compressEnd(ctx, out_buf, out_buf_length, NULL);
|
1966
|
+
if (LZ4F_isError(ccmp_length)) {
|
1967
|
+
compressed_length = -1;
|
1968
|
+
goto finish;
|
1969
|
+
}
|
1970
|
+
|
1971
|
+
frt_os_write_bytes(out_stream, out_buf, ccmp_length);
|
1972
|
+
compressed_length += ccmp_length;
|
1973
|
+
|
1974
|
+
finish:
|
1975
|
+
LZ4F_freeCompressionContext(ctx);
|
1976
|
+
free(out_buf);
|
1977
|
+
|
1978
|
+
return compressed_length;
|
1979
|
+
}
|
1980
|
+
|
1981
|
+
static int frt_os_write_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length, FrtCompressionType compression) {
|
1982
|
+
switch (compression) {
|
1983
|
+
case FRT_COMPRESSION_BROTLI:
|
1984
|
+
return frt_os_write_brotli_compressed_bytes(out_stream, data, length);
|
1985
|
+
case FRT_COMPRESSION_BZ2:
|
1986
|
+
return frt_os_write_bz2_compressed_bytes(out_stream, data, length);
|
1987
|
+
case FRT_COMPRESSION_LZ4:
|
1988
|
+
return frt_os_write_lz4_compressed_bytes(out_stream, data, length);
|
1989
|
+
default:
|
1990
|
+
return -1;
|
1991
|
+
}
|
1992
|
+
|
1993
|
+
}
|
1994
|
+
|
1995
|
+
void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
1554
1996
|
int i, j, stored_cnt = 0;
|
1555
1997
|
FrtDocField *df;
|
1556
1998
|
FrtFieldInfo *fi;
|
1999
|
+
FrtCompressionType compression;
|
1557
2000
|
FrtOutStream *fdt_out = fw->fdt_out, *fdx_out = fw->fdx_out;
|
1558
2001
|
const int doc_size = doc->size;
|
1559
2002
|
|
@@ -1577,13 +2020,26 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc)
|
|
1577
2020
|
const int df_size = df->size;
|
1578
2021
|
frt_os_write_vint(fdt_out, fi->number);
|
1579
2022
|
frt_os_write_vint(fdt_out, df_size);
|
1580
|
-
|
1581
|
-
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
*
|
1586
|
-
|
2023
|
+
|
2024
|
+
if (fi_is_compressed(fi)) {
|
2025
|
+
compression = frt_fi_get_compression(fi);
|
2026
|
+
for (j = 0; j < df_size; j++) {
|
2027
|
+
const int length = df->lengths[j];
|
2028
|
+
int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], length, compression);
|
2029
|
+
frt_os_write_vint(fdt_out, compressed_len - 1);
|
2030
|
+
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
2031
|
+
frt_os_write_vint(fdt_out, compression);
|
2032
|
+
}
|
2033
|
+
} else {
|
2034
|
+
for (j = 0; j < df_size; j++) {
|
2035
|
+
const int length = df->lengths[j];
|
2036
|
+
frt_os_write_vint(fdt_out, length);
|
2037
|
+
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
2038
|
+
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
2039
|
+
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
2040
|
+
/* leave a space between fields as that is how they are analyzed */
|
2041
|
+
frt_os_write_byte(fw->buffer, ' ');
|
2042
|
+
}
|
1587
2043
|
}
|
1588
2044
|
}
|
1589
2045
|
}
|
@@ -1938,8 +2394,7 @@ static char *ste_scan_to(FrtTermEnum *te, const char *term)
|
|
1938
2394
|
}
|
1939
2395
|
}
|
1940
2396
|
|
1941
|
-
static FrtSegmentTermEnum *ste_allocate()
|
1942
|
-
{
|
2397
|
+
static FrtSegmentTermEnum *ste_allocate(void) {
|
1943
2398
|
FrtSegmentTermEnum *ste = FRT_ALLOC_AND_ZERO(FrtSegmentTermEnum);
|
1944
2399
|
|
1945
2400
|
TE(ste)->next = &ste_next;
|
@@ -1964,7 +2419,6 @@ void frt_ste_close(FrtTermEnum *te)
|
|
1964
2419
|
free(te);
|
1965
2420
|
}
|
1966
2421
|
|
1967
|
-
|
1968
2422
|
static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
1969
2423
|
{
|
1970
2424
|
FrtSegmentTermEnum *ste = STE(te);
|
@@ -2079,9 +2533,7 @@ static void tew_destroy(TermEnumWrapper *tew)
|
|
2079
2533
|
tew->te->close(tew->te);
|
2080
2534
|
}
|
2081
2535
|
|
2082
|
-
static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, FrtTermEnum *te,
|
2083
|
-
FrtIndexReader *ir)
|
2084
|
-
{
|
2536
|
+
static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, FrtTermEnum *te, FrtIndexReader *ir) {
|
2085
2537
|
tew->index = index;
|
2086
2538
|
tew->ir = ir;
|
2087
2539
|
tew->te = te;
|
@@ -2090,9 +2542,7 @@ static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, FrtTermEnum *
|
|
2090
2542
|
return tew;
|
2091
2543
|
}
|
2092
2544
|
|
2093
|
-
|
2094
|
-
static char *mte_next(FrtTermEnum *te)
|
2095
|
-
{
|
2545
|
+
static char *mte_next(FrtTermEnum *te) {
|
2096
2546
|
TermEnumWrapper *top =
|
2097
2547
|
(TermEnumWrapper *)frt_pq_top(MTE(te)->tew_queue);
|
2098
2548
|
|
@@ -2122,8 +2572,7 @@ static char *mte_next(FrtTermEnum *te)
|
|
2122
2572
|
return te->curr_term;
|
2123
2573
|
}
|
2124
2574
|
|
2125
|
-
static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num)
|
2126
|
-
{
|
2575
|
+
static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num) {
|
2127
2576
|
MultiTermEnum *mte = MTE(te);
|
2128
2577
|
int i;
|
2129
2578
|
const int size = mte->size;
|
@@ -2151,8 +2600,7 @@ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num)
|
|
2151
2600
|
return te;
|
2152
2601
|
}
|
2153
2602
|
|
2154
|
-
static char *mte_skip_to(FrtTermEnum *te, const char *term)
|
2155
|
-
{
|
2603
|
+
static char *mte_skip_to(FrtTermEnum *te, const char *term) {
|
2156
2604
|
MultiTermEnum *mte = MTE(te);
|
2157
2605
|
int i;
|
2158
2606
|
const int size = mte->size;
|
@@ -2168,8 +2616,7 @@ static char *mte_skip_to(FrtTermEnum *te, const char *term)
|
|
2168
2616
|
return mte_next(te);
|
2169
2617
|
}
|
2170
2618
|
|
2171
|
-
static void mte_close(FrtTermEnum *te)
|
2172
|
-
{
|
2619
|
+
static void mte_close(FrtTermEnum *te) {
|
2173
2620
|
int i;
|
2174
2621
|
const int size = MTE(te)->size;
|
2175
2622
|
for (i = 0; i < size; i++) {
|
@@ -2182,10 +2629,9 @@ static void mte_close(FrtTermEnum *te)
|
|
2182
2629
|
free(te);
|
2183
2630
|
}
|
2184
2631
|
|
2185
|
-
FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
|
2186
|
-
|
2187
|
-
|
2188
|
-
int r_cnt = mr->r_cnt;
|
2632
|
+
FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term) {
|
2633
|
+
FrtIndexReader **readers = mr->sub_readers;
|
2634
|
+
int r_cnt = mr->r_cnt;
|
2189
2635
|
int i;
|
2190
2636
|
FrtIndexReader *reader;
|
2191
2637
|
MultiTermEnum *mte = FRT_ALLOC_AND_ZERO(MultiTermEnum);
|
@@ -2213,8 +2659,7 @@ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
|
|
2213
2659
|
|
2214
2660
|
if (NULL != term) {
|
2215
2661
|
sub_te = reader->terms_from(reader, fnum, term);
|
2216
|
-
}
|
2217
|
-
else {
|
2662
|
+
} else {
|
2218
2663
|
sub_te = reader->terms(reader, fnum);
|
2219
2664
|
}
|
2220
2665
|
|
@@ -2223,8 +2668,7 @@ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
|
|
2223
2668
|
|| (tew->term && (tew->term[0] != '\0'))) {
|
2224
2669
|
frt_pq_push(mte->tew_queue, tew); /* initialize queue */
|
2225
2670
|
}
|
2226
|
-
}
|
2227
|
-
else {
|
2671
|
+
} else {
|
2228
2672
|
/* add the term_enum_wrapper just in case */
|
2229
2673
|
sub_te = reader->terms(reader, 0);
|
2230
2674
|
sub_te->field_num = -1;
|
@@ -2246,9 +2690,7 @@ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
|
|
2246
2690
|
*
|
2247
2691
|
****************************************************************************/
|
2248
2692
|
|
2249
|
-
FrtTermInfosReader *frt_tir_open(FrtStore *store,
|
2250
|
-
FrtSegmentFieldIndex *sfi, const char *segment)
|
2251
|
-
{
|
2693
|
+
FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, const char *segment) {
|
2252
2694
|
FrtTermInfosReader *tir = FRT_ALLOC(FrtTermInfosReader);
|
2253
2695
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
2254
2696
|
|
@@ -2261,8 +2703,7 @@ FrtTermInfosReader *frt_tir_open(FrtStore *store,
|
|
2261
2703
|
return tir;
|
2262
2704
|
}
|
2263
2705
|
|
2264
|
-
static FrtTermEnum *tir_enum(FrtTermInfosReader *tir)
|
2265
|
-
{
|
2706
|
+
static FrtTermEnum *tir_enum(FrtTermInfosReader *tir) {
|
2266
2707
|
FrtTermEnum *te;
|
2267
2708
|
if (NULL == (te = (FrtTermEnum *)frt_thread_getspecific(tir->thread_te))) {
|
2268
2709
|
te = frt_ste_clone(tir->orig_te);
|
@@ -2273,8 +2714,7 @@ static FrtTermEnum *tir_enum(FrtTermInfosReader *tir)
|
|
2273
2714
|
return te;
|
2274
2715
|
}
|
2275
2716
|
|
2276
|
-
FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num)
|
2277
|
-
{
|
2717
|
+
FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num) {
|
2278
2718
|
if (field_num != tir->field_num) {
|
2279
2719
|
ste_set_field(tir_enum(tir), field_num);
|
2280
2720
|
tir->field_num = field_num;
|
@@ -2282,8 +2722,7 @@ FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num)
|
|
2282
2722
|
return tir;
|
2283
2723
|
}
|
2284
2724
|
|
2285
|
-
FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term)
|
2286
|
-
{
|
2725
|
+
FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term) {
|
2287
2726
|
FrtTermEnum *te = tir_enum(tir);
|
2288
2727
|
char *match;
|
2289
2728
|
|
@@ -2294,9 +2733,7 @@ FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term)
|
|
2294
2733
|
return NULL;
|
2295
2734
|
}
|
2296
2735
|
|
2297
|
-
static FrtTermInfo *tir_get_ti_field(FrtTermInfosReader *tir, int field_num,
|
2298
|
-
const char *term)
|
2299
|
-
{
|
2736
|
+
static FrtTermInfo *tir_get_ti_field(FrtTermInfosReader *tir, int field_num, const char *term) {
|
2300
2737
|
FrtTermEnum *te = tir_enum(tir);
|
2301
2738
|
char *match;
|
2302
2739
|
|
@@ -2312,19 +2749,16 @@ static FrtTermInfo *tir_get_ti_field(FrtTermInfosReader *tir, int field_num,
|
|
2312
2749
|
return NULL;
|
2313
2750
|
}
|
2314
2751
|
|
2315
|
-
char *frt_tir_get_term(FrtTermInfosReader *tir, int pos)
|
2316
|
-
{
|
2752
|
+
char *frt_tir_get_term(FrtTermInfosReader *tir, int pos) {
|
2317
2753
|
if (pos < 0) {
|
2318
2754
|
return NULL;
|
2319
|
-
}
|
2320
|
-
else {
|
2755
|
+
} else {
|
2321
2756
|
return frt_ste_get_term(tir_enum(tir), pos);
|
2322
2757
|
}
|
2323
2758
|
}
|
2324
2759
|
|
2325
2760
|
|
2326
|
-
void frt_tir_close(FrtTermInfosReader *tir)
|
2327
|
-
{
|
2761
|
+
void frt_tir_close(FrtTermInfosReader *tir) {
|
2328
2762
|
frt_ary_destroy(tir->te_bucket, (frt_free_ft)&frt_ste_close);
|
2329
2763
|
frt_ste_close(tir->orig_te);
|
2330
2764
|
|
@@ -2341,25 +2775,19 @@ void frt_tir_close(FrtTermInfosReader *tir)
|
|
2341
2775
|
*
|
2342
2776
|
****************************************************************************/
|
2343
2777
|
|
2344
|
-
static FrtTermWriter *tw_new(FrtStore *store, char *file_name)
|
2345
|
-
{
|
2778
|
+
static FrtTermWriter *tw_new(FrtStore *store, char *file_name) {
|
2346
2779
|
FrtTermWriter *tw = FRT_ALLOC_AND_ZERO(FrtTermWriter);
|
2347
2780
|
tw->os = store->new_output(store, file_name);
|
2348
2781
|
tw->last_term = FRT_EMPTY_STRING;
|
2349
2782
|
return tw;
|
2350
2783
|
}
|
2351
2784
|
|
2352
|
-
static void tw_close(FrtTermWriter *tw)
|
2353
|
-
{
|
2785
|
+
static void tw_close(FrtTermWriter *tw) {
|
2354
2786
|
frt_os_close(tw->os);
|
2355
2787
|
free(tw);
|
2356
2788
|
}
|
2357
2789
|
|
2358
|
-
FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
|
2359
|
-
const char *segment,
|
2360
|
-
int index_interval,
|
2361
|
-
int skip_interval)
|
2362
|
-
{
|
2790
|
+
FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index_interval, int skip_interval) {
|
2363
2791
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
2364
2792
|
FrtTermInfosWriter *tiw = FRT_ALLOC(FrtTermInfosWriter);
|
2365
2793
|
size_t segment_len = strlen(segment);
|
@@ -2388,11 +2816,7 @@ FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
|
|
2388
2816
|
return tiw;
|
2389
2817
|
}
|
2390
2818
|
|
2391
|
-
static void tw_write_term(FrtTermWriter *tw,
|
2392
|
-
FrtOutStream *os,
|
2393
|
-
const char *term,
|
2394
|
-
int term_len)
|
2395
|
-
{
|
2819
|
+
static void tw_write_term(FrtTermWriter *tw, FrtOutStream *os, const char *term, int term_len) {
|
2396
2820
|
int start = frt_hlp_string_diff(tw->last_term, term);
|
2397
2821
|
int length = term_len - start;
|
2398
2822
|
|
@@ -2403,12 +2827,7 @@ static void tw_write_term(FrtTermWriter *tw,
|
|
2403
2827
|
tw->last_term = term;
|
2404
2828
|
}
|
2405
2829
|
|
2406
|
-
static void tw_add(FrtTermWriter *tw,
|
2407
|
-
const char *term,
|
2408
|
-
int term_len,
|
2409
|
-
FrtTermInfo *ti,
|
2410
|
-
int skip_interval)
|
2411
|
-
{
|
2830
|
+
static void tw_add(FrtTermWriter *tw, const char *term, int term_len, FrtTermInfo *ti, int skip_interval) {
|
2412
2831
|
FrtOutStream *os = tw->os;
|
2413
2832
|
|
2414
2833
|
#ifdef DEBUG
|
@@ -2438,11 +2857,7 @@ static void tw_add(FrtTermWriter *tw,
|
|
2438
2857
|
tw->counter++;
|
2439
2858
|
}
|
2440
2859
|
|
2441
|
-
void frt_tiw_add(FrtTermInfosWriter *tiw,
|
2442
|
-
const char *term,
|
2443
|
-
int term_len,
|
2444
|
-
FrtTermInfo *ti)
|
2445
|
-
{
|
2860
|
+
void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int term_len, FrtTermInfo *ti) {
|
2446
2861
|
off_t tis_pos;
|
2447
2862
|
|
2448
2863
|
if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
|
@@ -2460,15 +2875,13 @@ void frt_tiw_add(FrtTermInfosWriter *tiw,
|
|
2460
2875
|
tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
|
2461
2876
|
}
|
2462
2877
|
|
2463
|
-
static void tw_reset(FrtTermWriter *tw)
|
2464
|
-
{
|
2878
|
+
static void tw_reset(FrtTermWriter *tw) {
|
2465
2879
|
tw->counter = 0;
|
2466
2880
|
tw->last_term = FRT_EMPTY_STRING;
|
2467
2881
|
FRT_ZEROSET(&(tw->last_term_info), FrtTermInfo);
|
2468
2882
|
}
|
2469
2883
|
|
2470
|
-
void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num)
|
2471
|
-
{
|
2884
|
+
void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num) {
|
2472
2885
|
FrtOutStream *tfx_out = tiw->tfx_out;
|
2473
2886
|
frt_os_write_vint(tfx_out, tiw->tix_writer->counter); /* write tix size */
|
2474
2887
|
frt_os_write_vint(tfx_out, tiw->tis_writer->counter); /* write tis size */
|
@@ -2481,8 +2894,7 @@ void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num)
|
|
2481
2894
|
tiw->field_count++;
|
2482
2895
|
}
|
2483
2896
|
|
2484
|
-
void frt_tiw_close(FrtTermInfosWriter *tiw)
|
2485
|
-
{
|
2897
|
+
void frt_tiw_close(FrtTermInfosWriter *tiw) {
|
2486
2898
|
FrtOutStream *tfx_out = tiw->tfx_out;
|
2487
2899
|
frt_os_write_vint(tfx_out, tiw->tix_writer->counter);
|
2488
2900
|
frt_os_write_vint(tfx_out, tiw->tis_writer->counter);
|
@@ -2516,8 +2928,7 @@ void frt_tiw_close(FrtTermInfosWriter *tiw)
|
|
2516
2928
|
}\
|
2517
2929
|
} while (0)
|
2518
2930
|
|
2519
|
-
static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti)
|
2520
|
-
{
|
2931
|
+
static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti) {
|
2521
2932
|
if (NULL == ti) {
|
2522
2933
|
stde->doc_freq = 0;
|
2523
2934
|
} else {
|
@@ -2535,14 +2946,12 @@ static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti)
|
|
2535
2946
|
}
|
2536
2947
|
}
|
2537
2948
|
|
2538
|
-
static void stde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
2539
|
-
{
|
2949
|
+
static void stde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
2540
2950
|
FrtTermInfo *ti = tir_get_ti_field(STDE(tde)->tir, field_num, term);
|
2541
2951
|
stde_seek_ti(STDE(tde), ti);
|
2542
2952
|
}
|
2543
2953
|
|
2544
|
-
static void stde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
2545
|
-
{
|
2954
|
+
static void stde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te) {
|
2546
2955
|
#ifdef DEBUG
|
2547
2956
|
if (te->set_field != &ste_set_field) {
|
2548
2957
|
FRT_RAISE(FRT_ARG_ERROR, "Passed an incorrect TermEnum type");
|
@@ -2551,20 +2960,17 @@ static void stde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
2551
2960
|
stde_seek_ti(STDE(tde), &(te->curr_ti));
|
2552
2961
|
}
|
2553
2962
|
|
2554
|
-
static int stde_doc_num(FrtTermDocEnum *tde)
|
2555
|
-
{
|
2963
|
+
static int stde_doc_num(FrtTermDocEnum *tde) {
|
2556
2964
|
CHECK_STATE("doc_num");
|
2557
2965
|
return STDE(tde)->doc_num;
|
2558
2966
|
}
|
2559
2967
|
|
2560
|
-
static int stde_freq(FrtTermDocEnum *tde)
|
2561
|
-
{
|
2968
|
+
static int stde_freq(FrtTermDocEnum *tde) {
|
2562
2969
|
CHECK_STATE("freq");
|
2563
2970
|
return STDE(tde)->freq;
|
2564
2971
|
}
|
2565
2972
|
|
2566
|
-
static bool stde_next(FrtTermDocEnum *tde)
|
2567
|
-
{
|
2973
|
+
static bool stde_next(FrtTermDocEnum *tde) {
|
2568
2974
|
int doc_code;
|
2569
2975
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2570
2976
|
|
@@ -2592,8 +2998,7 @@ static bool stde_next(FrtTermDocEnum *tde)
|
|
2592
2998
|
return true;
|
2593
2999
|
}
|
2594
3000
|
|
2595
|
-
static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2596
|
-
{
|
3001
|
+
static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2597
3002
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2598
3003
|
int i = 0;
|
2599
3004
|
int doc_code;
|
@@ -2620,8 +3025,7 @@ static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2620
3025
|
return i;
|
2621
3026
|
}
|
2622
3027
|
|
2623
|
-
static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
2624
|
-
{
|
3028
|
+
static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
2625
3029
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2626
3030
|
|
2627
3031
|
if (stde->doc_freq >= stde->skip_interval
|
@@ -2685,8 +3089,7 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2685
3089
|
return true;
|
2686
3090
|
}
|
2687
3091
|
|
2688
|
-
static void stde_close(FrtTermDocEnum *tde)
|
2689
|
-
{
|
3092
|
+
static void stde_close(FrtTermDocEnum *tde) {
|
2690
3093
|
frt_is_close(STDE(tde)->frq_in);
|
2691
3094
|
|
2692
3095
|
if (NULL != STDE(tde)->skip_in) {
|
@@ -2696,23 +3099,17 @@ static void stde_close(FrtTermDocEnum *tde)
|
|
2696
3099
|
free(tde);
|
2697
3100
|
}
|
2698
3101
|
|
2699
|
-
static void stde_skip_prox(FrtSegmentTermDocEnum *stde)
|
2700
|
-
{
|
3102
|
+
static void stde_skip_prox(FrtSegmentTermDocEnum *stde) {
|
2701
3103
|
(void)stde;
|
2702
3104
|
}
|
2703
3105
|
|
2704
|
-
static void stde_seek_prox(FrtSegmentTermDocEnum *stde, off_t prx_ptr)
|
2705
|
-
{
|
3106
|
+
static void stde_seek_prox(FrtSegmentTermDocEnum *stde, off_t prx_ptr) {
|
2706
3107
|
(void)stde;
|
2707
3108
|
(void)prx_ptr;
|
2708
3109
|
}
|
2709
3110
|
|
2710
3111
|
|
2711
|
-
FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir,
|
2712
|
-
FrtInStream *frq_in,
|
2713
|
-
FrtBitVector *deleted_docs,
|
2714
|
-
int skip_interval)
|
2715
|
-
{
|
3112
|
+
FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir, FrtInStream *frq_in, FrtBitVector *deleted_docs, int skip_interval) {
|
2716
3113
|
FrtSegmentTermDocEnum *stde = FRT_ALLOC_AND_ZERO(FrtSegmentTermDocEnum);
|
2717
3114
|
FrtTermDocEnum *tde = (FrtTermDocEnum *)stde;
|
2718
3115
|
|
@@ -2744,27 +3141,23 @@ FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir,
|
|
2744
3141
|
* SegmentTermPosEnum
|
2745
3142
|
****************************************************************************/
|
2746
3143
|
|
2747
|
-
static void stpe_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti)
|
2748
|
-
{
|
3144
|
+
static void stpe_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti) {
|
2749
3145
|
if (NULL == ti) {
|
2750
3146
|
stde->doc_freq = 0;
|
2751
|
-
}
|
2752
|
-
else {
|
3147
|
+
} else {
|
2753
3148
|
stde_seek_ti(stde, ti);
|
2754
3149
|
frt_is_seek(stde->prx_in, ti->prx_ptr);
|
2755
3150
|
}
|
2756
3151
|
}
|
2757
3152
|
|
2758
|
-
static void stpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
2759
|
-
{
|
3153
|
+
static void stpe_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
2760
3154
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2761
3155
|
FrtTermInfo *ti = tir_get_ti_field(stde->tir, field_num, term);
|
2762
3156
|
stpe_seek_ti(stde, ti);
|
2763
3157
|
stde->prx_cnt = 0;
|
2764
3158
|
}
|
2765
3159
|
|
2766
|
-
static bool stpe_next(FrtTermDocEnum *tde)
|
2767
|
-
{
|
3160
|
+
static bool stpe_next(FrtTermDocEnum *tde) {
|
2768
3161
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2769
3162
|
frt_is_skip_vints(stde->prx_in, stde->prx_cnt);
|
2770
3163
|
|
@@ -3238,8 +3631,8 @@ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, i
|
|
3238
3631
|
****************************************************************************/
|
3239
3632
|
|
3240
3633
|
static FrtHash *fn_extensions = NULL;
|
3241
|
-
|
3242
|
-
{
|
3634
|
+
|
3635
|
+
static void file_name_filter_init(void) {
|
3243
3636
|
int i;
|
3244
3637
|
fn_extensions = frt_h_new_str((frt_free_ft)NULL, (frt_free_ft)NULL);
|
3245
3638
|
for (i = 0; i < FRT_NELEMS(INDEX_EXTENSIONS); i++) {
|
@@ -3538,9 +3931,8 @@ static void ir_acquire_write_lock(FrtIndexReader *ir)
|
|
3538
3931
|
}
|
3539
3932
|
}
|
3540
3933
|
|
3541
|
-
static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentInfos *sis,
|
3542
|
-
|
3543
|
-
{
|
3934
|
+
static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentInfos *sis, FrtFieldInfos *fis, int is_owner) {
|
3935
|
+
ir->type = FRT_INDEX_READER;
|
3544
3936
|
frt_mutex_init(&ir->mutex, NULL);
|
3545
3937
|
frt_mutex_init(&ir->field_index_mutex, NULL);
|
3546
3938
|
|
@@ -3563,8 +3955,7 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3563
3955
|
return ir;
|
3564
3956
|
}
|
3565
3957
|
|
3566
|
-
int frt_ir_doc_freq(FrtIndexReader *ir,
|
3567
|
-
{
|
3958
|
+
int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term) {
|
3568
3959
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3569
3960
|
if (field_num >= 0) {
|
3570
3961
|
return ir->doc_freq(ir, field_num, term);
|
@@ -3574,8 +3965,7 @@ int frt_ir_doc_freq(FrtIndexReader *ir, FrtSymbol field, const char *term)
|
|
3574
3965
|
}
|
3575
3966
|
}
|
3576
3967
|
|
3577
|
-
static void ir_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val)
|
3578
|
-
{
|
3968
|
+
static void ir_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val) {
|
3579
3969
|
frt_mutex_lock(&ir->mutex);
|
3580
3970
|
ir->acquire_write_lock(ir);
|
3581
3971
|
ir->set_norm_i(ir, doc_num, field_num, val);
|
@@ -3583,8 +3973,7 @@ static void ir_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uc
|
|
3583
3973
|
frt_mutex_unlock(&ir->mutex);
|
3584
3974
|
}
|
3585
3975
|
|
3586
|
-
void frt_ir_set_norm(FrtIndexReader *ir, int doc_num,
|
3587
|
-
{
|
3976
|
+
void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val) {
|
3588
3977
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3589
3978
|
if (field_num >= 0) {
|
3590
3979
|
ir_set_norm_i(ir, doc_num, field_num, val);
|
@@ -3606,14 +3995,12 @@ frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num)
|
|
3606
3995
|
return norms;
|
3607
3996
|
}
|
3608
3997
|
|
3609
|
-
frt_uchar *frt_ir_get_norms(FrtIndexReader *ir,
|
3610
|
-
{
|
3998
|
+
frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, ID field) {
|
3611
3999
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3612
4000
|
return frt_ir_get_norms_i(ir, field_num);
|
3613
4001
|
}
|
3614
4002
|
|
3615
|
-
frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir,
|
3616
|
-
{
|
4003
|
+
frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
|
3617
4004
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3618
4005
|
if (field_num >= 0) {
|
3619
4006
|
ir->get_norms_into(ir, field_num, buf);
|
@@ -3644,7 +4031,7 @@ void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num)
|
|
3644
4031
|
}
|
3645
4032
|
}
|
3646
4033
|
|
3647
|
-
FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir,
|
4034
|
+
FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, ID field, const char *term) {
|
3648
4035
|
FrtTermDocEnum *tde = ir_term_docs_for(ir, field, term);
|
3649
4036
|
FrtDocument *doc = NULL;
|
3650
4037
|
|
@@ -3657,8 +4044,7 @@ FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, FrtSymbol field, const
|
|
3657
4044
|
return doc;
|
3658
4045
|
}
|
3659
4046
|
|
3660
|
-
FrtTermEnum *frt_ir_terms(FrtIndexReader *ir,
|
3661
|
-
{
|
4047
|
+
FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, ID field) {
|
3662
4048
|
FrtTermEnum *te = NULL;
|
3663
4049
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3664
4050
|
if (field_num >= 0) {
|
@@ -3667,9 +4053,7 @@ FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, FrtSymbol field)
|
|
3667
4053
|
return te;
|
3668
4054
|
}
|
3669
4055
|
|
3670
|
-
FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir,
|
3671
|
-
const char *term)
|
3672
|
-
{
|
4056
|
+
FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, ID field, const char *term) {
|
3673
4057
|
FrtTermEnum *te = NULL;
|
3674
4058
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3675
4059
|
if (field_num >= 0) {
|
@@ -3678,9 +4062,7 @@ FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, FrtSymbol field,
|
|
3678
4062
|
return te;
|
3679
4063
|
}
|
3680
4064
|
|
3681
|
-
FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir,
|
3682
|
-
const char *term)
|
3683
|
-
{
|
4065
|
+
FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, ID field, const char *term) {
|
3684
4066
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3685
4067
|
FrtTermDocEnum *tde = ir->term_docs(ir);
|
3686
4068
|
if (field_num >= 0) {
|
@@ -3689,9 +4071,7 @@ FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, FrtSymbol field,
|
|
3689
4071
|
return tde;
|
3690
4072
|
}
|
3691
4073
|
|
3692
|
-
FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir,
|
3693
|
-
const char *term)
|
3694
|
-
{
|
4074
|
+
FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const char *term) {
|
3695
4075
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3696
4076
|
FrtTermDocEnum *tde = ir->term_positions(ir);
|
3697
4077
|
if (field_num >= 0) {
|
@@ -3705,7 +4085,7 @@ static void ir_commit_i(FrtIndexReader *ir)
|
|
3705
4085
|
if (ir->has_changes) {
|
3706
4086
|
if (NULL == ir->deleter && NULL != ir->store) {
|
3707
4087
|
/* In the MultiReader case, we share this deleter across all
|
3708
|
-
*
|
4088
|
+
* FrtSegmentReaders: */
|
3709
4089
|
ir->set_deleter_i(ir, frt_deleter_new(ir->sis, ir->store));
|
3710
4090
|
}
|
3711
4091
|
if (ir->is_owner) {
|
@@ -3841,34 +4221,14 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
|
3841
4221
|
}
|
3842
4222
|
|
3843
4223
|
/****************************************************************************
|
3844
|
-
*
|
4224
|
+
* FrtSegmentReader
|
3845
4225
|
****************************************************************************/
|
3846
4226
|
|
3847
|
-
typedef struct SegmentReader {
|
3848
|
-
FrtIndexReader ir;
|
3849
|
-
FrtSegmentInfo *si;
|
3850
|
-
char *segment;
|
3851
|
-
FrtFieldsReader *fr;
|
3852
|
-
FrtBitVector *deleted_docs;
|
3853
|
-
FrtInStream *frq_in;
|
3854
|
-
FrtInStream *prx_in;
|
3855
|
-
FrtSegmentFieldIndex *sfi;
|
3856
|
-
FrtTermInfosReader *tir;
|
3857
|
-
frt_thread_key_t thread_fr;
|
3858
|
-
void **fr_bucket;
|
3859
|
-
FrtHash *norms;
|
3860
|
-
FrtStore *cfs_store;
|
3861
|
-
bool deleted_docs_dirty : 1;
|
3862
|
-
bool undelete_all : 1;
|
3863
|
-
bool norms_dirty : 1;
|
3864
|
-
} SegmentReader;
|
3865
|
-
|
3866
4227
|
#define IR(ir) ((FrtIndexReader *)(ir))
|
3867
|
-
|
3868
|
-
#define SR(ir) ((SegmentReader *)(ir))
|
4228
|
+
#define SR(ir) ((FrtSegmentReader *)(ir))
|
3869
4229
|
#define SR_SIZE(ir) (SR(ir)->fr->size)
|
3870
4230
|
|
3871
|
-
static FrtFieldsReader *sr_fr(
|
4231
|
+
static FrtFieldsReader *sr_fr(FrtSegmentReader *sr)
|
3872
4232
|
{
|
3873
4233
|
FrtFieldsReader *fr;
|
3874
4234
|
|
@@ -3880,12 +4240,12 @@ static FrtFieldsReader *sr_fr(SegmentReader *sr)
|
|
3880
4240
|
return fr;
|
3881
4241
|
}
|
3882
4242
|
|
3883
|
-
static bool sr_is_deleted_i(
|
4243
|
+
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num)
|
3884
4244
|
{
|
3885
4245
|
return (NULL != sr->deleted_docs && frt_bv_get(sr->deleted_docs, doc_num));
|
3886
4246
|
}
|
3887
4247
|
|
3888
|
-
static void sr_get_norms_into_i(
|
4248
|
+
static void sr_get_norms_into_i(FrtSegmentReader *sr, int field_num,
|
3889
4249
|
frt_uchar *buf)
|
3890
4250
|
{
|
3891
4251
|
Norm *norm = (Norm *)frt_h_get_int(sr->norms, field_num);
|
@@ -3904,7 +4264,7 @@ static void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3904
4264
|
}
|
3905
4265
|
}
|
3906
4266
|
|
3907
|
-
static frt_uchar *sr_get_norms_i(
|
4267
|
+
static frt_uchar *sr_get_norms_i(FrtSegmentReader *sr, int field_num)
|
3908
4268
|
{
|
3909
4269
|
Norm *norm = (Norm *)frt_h_get_int(sr->norms, field_num);
|
3910
4270
|
if (NULL == norm) { /* not an indexed field */
|
@@ -4040,7 +4400,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
4040
4400
|
|
4041
4401
|
static void sr_close_i(FrtIndexReader *ir)
|
4042
4402
|
{
|
4043
|
-
|
4403
|
+
FrtSegmentReader *sr = SR(ir);
|
4044
4404
|
|
4045
4405
|
if (sr->fr) frt_fr_close(sr->fr);
|
4046
4406
|
if (sr->tir) frt_tir_close(sr->tir);
|
@@ -4149,14 +4509,12 @@ static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir)
|
|
4149
4509
|
|
4150
4510
|
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir)
|
4151
4511
|
{
|
4152
|
-
|
4512
|
+
FrtSegmentReader *sr = SR(ir);
|
4153
4513
|
return frt_stpe_new(sr->tir, sr->frq_in, sr->prx_in, sr->deleted_docs,
|
4154
4514
|
STE(sr->tir->orig_te)->skip_interval);
|
4155
4515
|
}
|
4156
4516
|
|
4157
|
-
static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num,
|
4158
|
-
FrtSymbol field)
|
4159
|
-
{
|
4517
|
+
static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field) {
|
4160
4518
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(ir->fis->field_dict, (void *)field);
|
4161
4519
|
FrtFieldsReader *fr;
|
4162
4520
|
|
@@ -4211,7 +4569,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
4211
4569
|
SR(ir)->norms_dirty = false;
|
4212
4570
|
}
|
4213
4571
|
|
4214
|
-
static FrtIndexReader *sr_setup_i(
|
4572
|
+
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
4215
4573
|
{
|
4216
4574
|
FrtStore *volatile store = sr->si->store;
|
4217
4575
|
FrtIndexReader *ir = IR(sr);
|
@@ -4242,6 +4600,8 @@ static FrtIndexReader *sr_setup_i(SegmentReader *sr)
|
|
4242
4600
|
ir->commit_i = &sr_commit_i;
|
4243
4601
|
ir->close_i = &sr_close_i;
|
4244
4602
|
|
4603
|
+
ir->type = FRT_SEGMENT_READER;
|
4604
|
+
|
4245
4605
|
sr->cfs_store = NULL;
|
4246
4606
|
|
4247
4607
|
FRT_TRY
|
@@ -4281,10 +4641,13 @@ static FrtIndexReader *sr_setup_i(SegmentReader *sr)
|
|
4281
4641
|
return ir;
|
4282
4642
|
}
|
4283
4643
|
|
4284
|
-
|
4285
|
-
|
4286
|
-
|
4287
|
-
|
4644
|
+
FrtSegmentReader *frt_sr_alloc(void) {
|
4645
|
+
return FRT_ALLOC_AND_ZERO(FrtSegmentReader);
|
4646
|
+
}
|
4647
|
+
|
4648
|
+
static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_num, bool is_owner, FrtSegmentReader *sr) {
|
4649
|
+
if (sr == NULL)
|
4650
|
+
sr = frt_sr_alloc();
|
4288
4651
|
sr->si = sis->segs[si_num];
|
4289
4652
|
ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
|
4290
4653
|
return sr_setup_i(sr);
|
@@ -4455,9 +4818,7 @@ static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir)
|
|
4455
4818
|
return mtpe_new(MR(ir));
|
4456
4819
|
}
|
4457
4820
|
|
4458
|
-
static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num,
|
4459
|
-
FrtSymbol field)
|
4460
|
-
{
|
4821
|
+
static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num, ID field) {
|
4461
4822
|
GET_READER();
|
4462
4823
|
return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
|
4463
4824
|
}
|
@@ -4561,10 +4922,12 @@ static void mr_close_i(FrtIndexReader *ir)
|
|
4561
4922
|
free(MR(ir)->starts);
|
4562
4923
|
}
|
4563
4924
|
|
4564
|
-
|
4565
|
-
|
4925
|
+
FrtMultiReader *frt_mr_alloc(void) {
|
4926
|
+
return FRT_ALLOC_AND_ZERO(FrtMultiReader);
|
4927
|
+
}
|
4928
|
+
|
4929
|
+
FrtMultiReader *frt_mr_init(FrtMultiReader *mr, FrtIndexReader **sub_readers, const int r_cnt) {
|
4566
4930
|
int i;
|
4567
|
-
FrtMultiReader *mr = FRT_ALLOC_AND_ZERO(FrtMultiReader);
|
4568
4931
|
FrtIndexReader *ir = IR(mr);
|
4569
4932
|
|
4570
4933
|
mr->sub_readers = sub_readers;
|
@@ -4611,21 +4974,19 @@ static FrtIndexReader *mr_new(FrtIndexReader **sub_readers, const int r_cnt)
|
|
4611
4974
|
ir->commit_i = &mr_commit_i;
|
4612
4975
|
ir->close_i = &mr_close_i;
|
4613
4976
|
|
4614
|
-
|
4977
|
+
ir->type = FRT_MULTI_READER;
|
4978
|
+
|
4979
|
+
return mr;
|
4615
4980
|
}
|
4616
4981
|
|
4617
|
-
static FrtIndexReader *frt_mr_open_i(FrtStore *store,
|
4618
|
-
|
4619
|
-
|
4620
|
-
|
4621
|
-
const int r_cnt)
|
4622
|
-
{
|
4623
|
-
FrtIndexReader *ir = mr_new(sub_readers, r_cnt);
|
4982
|
+
static FrtIndexReader *frt_mr_open_i(FrtStore *store, FrtSegmentInfos *sis, FrtFieldInfos *fis, FrtIndexReader **sub_readers, const int r_cnt, FrtIndexReader *ir) {
|
4983
|
+
if (ir == NULL)
|
4984
|
+
ir = (FrtIndexReader *)frt_mr_alloc();
|
4985
|
+
ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
|
4624
4986
|
return ir_setup(ir, store, sis, fis, true);
|
4625
4987
|
}
|
4626
4988
|
|
4627
|
-
static void mr_close_ext_i(FrtIndexReader *ir)
|
4628
|
-
{
|
4989
|
+
static void mr_close_ext_i(FrtIndexReader *ir) {
|
4629
4990
|
int **field_num_map = MR(ir)->field_num_map;
|
4630
4991
|
if (field_num_map) {
|
4631
4992
|
int i;
|
@@ -4638,12 +4999,13 @@ static void mr_close_ext_i(FrtIndexReader *ir)
|
|
4638
4999
|
mr_close_i(ir);
|
4639
5000
|
}
|
4640
5001
|
|
4641
|
-
FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt)
|
4642
|
-
|
4643
|
-
|
5002
|
+
FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, const int r_cnt) {
|
5003
|
+
if (ir == NULL)
|
5004
|
+
ir = (FrtIndexReader *)frt_mr_alloc();
|
5005
|
+
ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
|
4644
5006
|
FrtMultiReader *mr = MR(ir);
|
4645
5007
|
/* defaults don't matter, this is just for reading fields, not adding */
|
4646
|
-
FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
5008
|
+
FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
4647
5009
|
int i, j;
|
4648
5010
|
bool need_field_map = false;
|
4649
5011
|
|
@@ -4678,12 +5040,10 @@ FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt)
|
|
4678
5040
|
mr->field_num_map[i][j] = fi_sub ? fi_sub->number : -1;
|
4679
5041
|
}
|
4680
5042
|
}
|
4681
|
-
}
|
4682
|
-
else {
|
5043
|
+
} else {
|
4683
5044
|
mr->field_num_map = NULL;
|
4684
5045
|
}
|
4685
5046
|
|
4686
|
-
|
4687
5047
|
ir->close_i = &mr_close_ext_i;
|
4688
5048
|
|
4689
5049
|
return ir_setup(ir, NULL, NULL, fis, false);
|
@@ -4693,21 +5053,19 @@ FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt)
|
|
4693
5053
|
* IndexReader
|
4694
5054
|
****************************************************************************/
|
4695
5055
|
|
4696
|
-
|
4697
|
-
static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
|
4698
|
-
{
|
5056
|
+
static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir) {
|
4699
5057
|
volatile bool success = false;
|
4700
|
-
FrtIndexReader *volatile ir = NULL;
|
5058
|
+
// FrtIndexReader *volatile ir = NULL;
|
4701
5059
|
FrtSegmentInfos *volatile sis = NULL;
|
4702
5060
|
FRT_TRY
|
4703
5061
|
do {
|
4704
5062
|
FrtFieldInfos *fis;
|
4705
5063
|
frt_mutex_lock(&store->mutex);
|
4706
|
-
frt_sis_read_i(store, fsf);
|
5064
|
+
frt_sis_read_i(store, fsf, NULL);
|
4707
5065
|
sis = fsf->ret.sis;
|
4708
5066
|
fis = sis->fis;
|
4709
5067
|
if (sis->size == 1) {
|
4710
|
-
ir = sr_open(sis, fis, 0, true);
|
5068
|
+
ir = sr_open(sis, fis, 0, true, (FrtSegmentReader *)ir);
|
4711
5069
|
}
|
4712
5070
|
else {
|
4713
5071
|
volatile int i;
|
@@ -4715,7 +5073,7 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
|
|
4715
5073
|
int num_segments = sis->size;
|
4716
5074
|
for (i = num_segments - 1; i >= 0; i--) {
|
4717
5075
|
FRT_TRY
|
4718
|
-
readers[i] = sr_open(sis, fis, i, false);
|
5076
|
+
readers[i] = sr_open(sis, fis, i, false, NULL);
|
4719
5077
|
FRT_XCATCHALL
|
4720
5078
|
for (i++; i < num_segments; i++) {
|
4721
5079
|
frt_ir_close(readers[i]);
|
@@ -4723,7 +5081,7 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
|
|
4723
5081
|
free(readers);
|
4724
5082
|
FRT_XENDTRY
|
4725
5083
|
}
|
4726
|
-
ir = frt_mr_open_i(store, sis, fis, readers, sis->size);
|
5084
|
+
ir = frt_mr_open_i(store, sis, fis, readers, sis->size, ir);
|
4727
5085
|
}
|
4728
5086
|
fsf->ret.ir = ir;
|
4729
5087
|
success = true;
|
@@ -4732,8 +5090,7 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
|
|
4732
5090
|
if (!success) {
|
4733
5091
|
if (ir) {
|
4734
5092
|
frt_ir_close(ir);
|
4735
|
-
}
|
4736
|
-
else if (sis) {
|
5093
|
+
} else if (sis) {
|
4737
5094
|
frt_sis_destroy(sis);
|
4738
5095
|
}
|
4739
5096
|
}
|
@@ -4745,15 +5102,12 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
|
|
4745
5102
|
* Will keep a reference to the store. To let this method delete the store
|
4746
5103
|
* make sure you deref the store that you pass to it
|
4747
5104
|
*/
|
4748
|
-
FrtIndexReader *frt_ir_open(FrtStore *store)
|
4749
|
-
{
|
5105
|
+
FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store) {
|
4750
5106
|
FindSegmentsFile fsf;
|
4751
|
-
sis_find_segments_file(store, &fsf, &ir_open_i);
|
5107
|
+
sis_find_segments_file(store, &fsf, &ir_open_i, ir);
|
4752
5108
|
return fsf.ret.ir;
|
4753
5109
|
}
|
4754
5110
|
|
4755
|
-
|
4756
|
-
|
4757
5111
|
/****************************************************************************
|
4758
5112
|
*
|
4759
5113
|
* Occurence
|
@@ -5143,10 +5497,7 @@ static void dw_add_offsets(FrtDocWriter *dw, int pos, off_t start, off_t end)
|
|
5143
5497
|
dw->offsets_size = pos + 1;
|
5144
5498
|
}
|
5145
5499
|
|
5146
|
-
FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
5147
|
-
FrtFieldInverter *fld_inv,
|
5148
|
-
FrtDocField *df)
|
5149
|
-
{
|
5500
|
+
FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDocField *df) {
|
5150
5501
|
FrtMemoryPool *mp = dw->mp;
|
5151
5502
|
FrtAnalyzer *a = dw->analyzer;
|
5152
5503
|
FrtHash *curr_plists = dw->curr_plists;
|
@@ -5162,7 +5513,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
|
5162
5513
|
int pos = -1, num_terms = 0;
|
5163
5514
|
|
5164
5515
|
for (i = 0; i < df_size; i++) {
|
5165
|
-
FrtTokenStream *ts = frt_a_get_ts(a, df->name, df->data[i]);
|
5516
|
+
FrtTokenStream *ts = frt_a_get_ts(a, df->name, df->data[i], df->encodings[i]);
|
5166
5517
|
/* ts->reset(ts, df->data[i]); no longer being called */
|
5167
5518
|
if (store_offsets) {
|
5168
5519
|
while (NULL != (tk = ts->next(ts))) {
|
@@ -5172,21 +5523,16 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
|
5172
5523
|
if (pos < 0) {
|
5173
5524
|
pos = 0;
|
5174
5525
|
}
|
5175
|
-
dw_add_posting(mp, curr_plists, fld_plists, doc_num,
|
5176
|
-
|
5177
|
-
dw_add_offsets(dw, pos,
|
5178
|
-
start_offset + tk->start,
|
5179
|
-
start_offset + tk->end);
|
5526
|
+
dw_add_posting(mp, curr_plists, fld_plists, doc_num, tk->text, tk->len, pos);
|
5527
|
+
dw_add_offsets(dw, pos, start_offset + tk->start, start_offset + tk->end);
|
5180
5528
|
if (num_terms++ >= dw->max_field_length) {
|
5181
5529
|
break;
|
5182
5530
|
}
|
5183
5531
|
}
|
5184
|
-
}
|
5185
|
-
else {
|
5532
|
+
} else {
|
5186
5533
|
while (NULL != (tk = ts->next(ts))) {
|
5187
5534
|
pos += tk->pos_inc;
|
5188
|
-
dw_add_posting(mp, curr_plists, fld_plists, doc_num,
|
5189
|
-
tk->text, tk->len, pos);
|
5535
|
+
dw_add_posting(mp, curr_plists, fld_plists, doc_num, tk->text, tk->len, pos);
|
5190
5536
|
if (num_terms++ >= dw->max_field_length) {
|
5191
5537
|
break;
|
5192
5538
|
}
|
@@ -5196,8 +5542,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
|
5196
5542
|
start_offset += df->lengths[i] + 1;
|
5197
5543
|
}
|
5198
5544
|
fld_inv->length = num_terms;
|
5199
|
-
}
|
5200
|
-
else {
|
5545
|
+
} else {
|
5201
5546
|
char buf[FRT_MAX_WORD_SIZE];
|
5202
5547
|
buf[FRT_MAX_WORD_SIZE - 1] = '\0';
|
5203
5548
|
for (i = 0; i < df_size; i++) {
|
@@ -5207,11 +5552,9 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
|
5207
5552
|
len = FRT_MAX_WORD_SIZE - 1;
|
5208
5553
|
data_ptr = (char *)memcpy(buf, df->data[i], len);
|
5209
5554
|
}
|
5210
|
-
dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
|
5211
|
-
len, i);
|
5555
|
+
dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr, len, i);
|
5212
5556
|
if (store_offsets) {
|
5213
|
-
dw_add_offsets(dw, i, start_offset,
|
5214
|
-
start_offset + df->lengths[i]);
|
5557
|
+
dw_add_offsets(dw, i, start_offset, start_offset + df->lengths[i]);
|
5215
5558
|
}
|
5216
5559
|
start_offset += df->lengths[i] + 1;
|
5217
5560
|
}
|
@@ -5220,14 +5563,12 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
|
5220
5563
|
return curr_plists;
|
5221
5564
|
}
|
5222
5565
|
|
5223
|
-
void frt_dw_reset_postings(FrtHash *postings)
|
5224
|
-
{
|
5566
|
+
void frt_dw_reset_postings(FrtHash *postings) {
|
5225
5567
|
FRT_ZEROSET_N(postings->table, FrtHashEntry, postings->mask + 1);
|
5226
5568
|
postings->fill = postings->size = 0;
|
5227
5569
|
}
|
5228
5570
|
|
5229
|
-
void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc)
|
5230
|
-
{
|
5571
|
+
void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
5231
5572
|
int i;
|
5232
5573
|
float boost;
|
5233
5574
|
FrtDocField *df;
|
@@ -5249,16 +5590,12 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc)
|
|
5249
5590
|
|
5250
5591
|
postings = frt_dw_invert_field(dw, fld_inv, df);
|
5251
5592
|
if (fld_inv->store_term_vector) {
|
5252
|
-
frt_fw_add_postings(dw->fw, fld_inv->fi->number,
|
5253
|
-
dw_sort_postings(postings), postings->size,
|
5254
|
-
dw->offsets, dw->offsets_size);
|
5593
|
+
frt_fw_add_postings(dw->fw, fld_inv->fi->number, dw_sort_postings(postings), postings->size, dw->offsets, dw->offsets_size);
|
5255
5594
|
}
|
5256
5595
|
|
5257
5596
|
if (fld_inv->has_norms) {
|
5258
|
-
boost = fld_inv->fi->boost * doc->boost * df->boost *
|
5259
|
-
|
5260
|
-
fld_inv->norms[dw->doc_num] =
|
5261
|
-
frt_sim_encode_norm(dw->similarity, boost);
|
5597
|
+
boost = fld_inv->fi->boost * doc->boost * df->boost * frt_sim_length_norm(dw->similarity, fi->name, fld_inv->length);
|
5598
|
+
fld_inv->norms[dw->doc_num] = frt_sim_encode_norm(dw->similarity, boost);
|
5262
5599
|
}
|
5263
5600
|
frt_dw_reset_postings(postings);
|
5264
5601
|
if (dw->offsets_size > 0) {
|
@@ -5811,15 +6148,12 @@ static void iw_commit_compound_file(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
|
5811
6148
|
iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
|
5812
6149
|
}
|
5813
6150
|
|
5814
|
-
static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg,
|
5815
|
-
const int max_seg)
|
5816
|
-
{
|
6151
|
+
static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg, const int max_seg) {
|
5817
6152
|
int i;
|
5818
6153
|
FrtSegmentInfos *sis = iw->sis;
|
5819
6154
|
FrtSegmentInfo *si = frt_sis_new_segment(sis, 0, iw->store);
|
5820
6155
|
|
5821
|
-
SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg],
|
5822
|
-
max_seg - min_seg);
|
6156
|
+
SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg], max_seg - min_seg);
|
5823
6157
|
|
5824
6158
|
/* This is where all the action happens. */
|
5825
6159
|
si->doc_cnt = sm_merge(merger);
|
@@ -5931,8 +6265,7 @@ void frt_iw_commit(FrtIndexWriter *iw)
|
|
5931
6265
|
frt_mutex_unlock(&iw->mutex);
|
5932
6266
|
}
|
5933
6267
|
|
5934
|
-
void frt_iw_delete_term(FrtIndexWriter *iw,
|
5935
|
-
{
|
6268
|
+
void frt_iw_delete_term(FrtIndexWriter *iw, ID field, const char *term) {
|
5936
6269
|
int field_num = frt_fis_get_field_num(iw->fis, field);
|
5937
6270
|
if (field_num >= 0) {
|
5938
6271
|
int i;
|
@@ -5943,7 +6276,7 @@ void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term)
|
|
5943
6276
|
const int seg_cnt = sis->size;
|
5944
6277
|
bool did_delete = false;
|
5945
6278
|
for (i = 0; i < seg_cnt; i++) {
|
5946
|
-
FrtIndexReader *ir = sr_open(sis, iw->fis, i, false);
|
6279
|
+
FrtIndexReader *ir = sr_open(sis, iw->fis, i, false, NULL);
|
5947
6280
|
FrtTermDocEnum *tde = ir->term_docs(ir);
|
5948
6281
|
ir->deleter = iw->deleter;
|
5949
6282
|
stde_seek(tde, field_num, term);
|
@@ -5965,9 +6298,7 @@ void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term)
|
|
5965
6298
|
}
|
5966
6299
|
}
|
5967
6300
|
|
5968
|
-
void frt_iw_delete_terms(FrtIndexWriter *iw,
|
5969
|
-
char **terms, const int term_cnt)
|
5970
|
-
{
|
6301
|
+
void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int term_cnt) {
|
5971
6302
|
int field_num = frt_fis_get_field_num(iw->fis, field);
|
5972
6303
|
if (field_num >= 0) {
|
5973
6304
|
int i;
|
@@ -5978,7 +6309,7 @@ void frt_iw_delete_terms(FrtIndexWriter *iw, FrtSymbol field,
|
|
5978
6309
|
const int seg_cnt = sis->size;
|
5979
6310
|
bool did_delete = false;
|
5980
6311
|
for (i = 0; i < seg_cnt; i++) {
|
5981
|
-
FrtIndexReader *ir = sr_open(sis, iw->fis, i, false);
|
6312
|
+
FrtIndexReader *ir = sr_open(sis, iw->fis, i, false, NULL);
|
5982
6313
|
FrtTermDocEnum *tde = ir->term_docs(ir);
|
5983
6314
|
int j;
|
5984
6315
|
for (j = 0 ; j < term_cnt; j++) {
|
@@ -6047,10 +6378,13 @@ void frt_iw_close(FrtIndexWriter *iw)
|
|
6047
6378
|
free(iw);
|
6048
6379
|
}
|
6049
6380
|
|
6050
|
-
FrtIndexWriter *
|
6051
|
-
|
6052
|
-
|
6053
|
-
|
6381
|
+
FrtIndexWriter *frt_iw_alloc(void) {
|
6382
|
+
return FRT_ALLOC_AND_ZERO(FrtIndexWriter);
|
6383
|
+
}
|
6384
|
+
|
6385
|
+
FrtIndexWriter *frt_iw_open(FrtIndexWriter *iw, FrtStore *store, FrtAnalyzer *volatile analyzer, const FrtConfig *config) {
|
6386
|
+
if (iw == NULL)
|
6387
|
+
iw = frt_iw_alloc();
|
6054
6388
|
frt_mutex_init(&iw->mutex, NULL);
|
6055
6389
|
iw->store = store;
|
6056
6390
|
if (!config) {
|
@@ -6081,7 +6415,7 @@ FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *volatile analyzer,
|
|
6081
6415
|
|
6082
6416
|
iw->similarity = frt_sim_create_default();
|
6083
6417
|
iw->analyzer = analyzer ? (FrtAnalyzer *)analyzer
|
6084
|
-
:
|
6418
|
+
: frt_standard_analyzer_new(true);
|
6085
6419
|
|
6086
6420
|
iw->deleter = frt_deleter_new(iw->sis, store);
|
6087
6421
|
deleter_delete_deletable_files(iw->deleter);
|
@@ -6093,9 +6427,7 @@ FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *volatile analyzer,
|
|
6093
6427
|
/*******************/
|
6094
6428
|
/*** Add Indexes ***/
|
6095
6429
|
/*******************/
|
6096
|
-
static void iw_cp_fields(FrtIndexWriter *iw,
|
6097
|
-
const char *segment, int *map)
|
6098
|
-
{
|
6430
|
+
static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *segment, int *map) {
|
6099
6431
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
6100
6432
|
FrtOutStream *fdt_out, *fdx_out;
|
6101
6433
|
FrtInStream *fdt_in, *fdx_in;
|
@@ -6122,7 +6454,6 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6122
6454
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
6123
6455
|
}
|
6124
6456
|
|
6125
|
-
|
6126
6457
|
if (map) {
|
6127
6458
|
int i;
|
6128
6459
|
const int max_doc = sr_max_doc(IR(sr));
|
@@ -6143,10 +6474,14 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6143
6474
|
frt_os_write_vint(fdt_out, df_size);
|
6144
6475
|
/* sum total lengths of FrtDocField */
|
6145
6476
|
for (k = 0; k < df_size; k++) {
|
6146
|
-
|
6147
|
-
const int
|
6477
|
+
const int flen = frt_is_read_vint(fdt_in); /* length */
|
6478
|
+
const int fenc = frt_is_read_vint(fdt_in); /* encoding */
|
6479
|
+
const int fcmp = frt_is_read_vint(fdt_in); /* compression */
|
6148
6480
|
frt_os_write_vint(fdt_out, flen);
|
6149
|
-
|
6481
|
+
frt_os_write_vint(fdt_out, fenc);
|
6482
|
+
frt_os_write_vint(fdt_out, fcmp);
|
6483
|
+
/* Each field has one ' ' byte so add 1 */
|
6484
|
+
data_len += flen + 1;
|
6150
6485
|
}
|
6151
6486
|
}
|
6152
6487
|
frt_is2os_copy_bytes(fdt_in, fdt_out, data_len);
|
@@ -6169,8 +6504,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6169
6504
|
frt_os_write_vint(fdt_out, tv_size);
|
6170
6505
|
}
|
6171
6506
|
}
|
6172
|
-
}
|
6173
|
-
else {
|
6507
|
+
} else {
|
6174
6508
|
frt_is2os_copy_bytes(fdt_in, fdt_out, frt_is_length(fdt_in));
|
6175
6509
|
frt_is2os_copy_bytes(fdx_in, fdx_out, frt_is_length(fdx_in));
|
6176
6510
|
}
|
@@ -6180,7 +6514,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6180
6514
|
frt_os_close(fdx_out);
|
6181
6515
|
}
|
6182
6516
|
|
6183
|
-
static void iw_cp_terms(FrtIndexWriter *iw,
|
6517
|
+
static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
6184
6518
|
const char *segment, int *map)
|
6185
6519
|
{
|
6186
6520
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -6249,7 +6583,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6249
6583
|
frt_os_close(prx_out);
|
6250
6584
|
}
|
6251
6585
|
|
6252
|
-
static void iw_cp_norms(FrtIndexWriter *iw,
|
6586
|
+
static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
6253
6587
|
FrtSegmentInfo *si, int *map)
|
6254
6588
|
{
|
6255
6589
|
int i;
|
@@ -6280,9 +6614,7 @@ static void iw_cp_norms(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6280
6614
|
}
|
6281
6615
|
}
|
6282
6616
|
|
6283
|
-
static void iw_cp_map_files(FrtIndexWriter *iw,
|
6284
|
-
FrtSegmentInfo *si)
|
6285
|
-
{
|
6617
|
+
static void iw_cp_map_files(FrtIndexWriter *iw, FrtSegmentReader *sr, FrtSegmentInfo *si) {
|
6286
6618
|
int i;
|
6287
6619
|
FrtFieldInfos *from_fis = IR(sr)->fis;
|
6288
6620
|
FrtFieldInfos *to_fis = iw->fis;
|
@@ -6300,15 +6632,13 @@ static void iw_cp_map_files(FrtIndexWriter *iw, SegmentReader *sr,
|
|
6300
6632
|
free(field_map);
|
6301
6633
|
}
|
6302
6634
|
|
6303
|
-
static void iw_cp_files(FrtIndexWriter *iw,
|
6304
|
-
FrtSegmentInfo *si)
|
6305
|
-
{
|
6635
|
+
static void iw_cp_files(FrtIndexWriter *iw, FrtSegmentReader *sr, FrtSegmentInfo *si) {
|
6306
6636
|
iw_cp_fields(iw, sr, si->name, NULL);
|
6307
6637
|
iw_cp_terms( iw, sr, si->name, NULL);
|
6308
6638
|
iw_cp_norms( iw, sr, si, NULL);
|
6309
6639
|
}
|
6310
6640
|
|
6311
|
-
static void iw_add_segment(FrtIndexWriter *iw,
|
6641
|
+
static void iw_add_segment(FrtIndexWriter *iw, FrtSegmentReader *sr)
|
6312
6642
|
{
|
6313
6643
|
FrtSegmentInfo *si = frt_sis_new_segment(iw->sis, 0, iw->store);
|
6314
6644
|
FrtFieldInfos *fis = iw->fis;
|
@@ -6323,7 +6653,7 @@ static void iw_add_segment(FrtIndexWriter *iw, SegmentReader *sr)
|
|
6323
6653
|
FrtFieldInfo *fi = sub_fis->fields[j];
|
6324
6654
|
FrtFieldInfo *new_fi = frt_fis_get_field(fis, fi->name);
|
6325
6655
|
if (NULL == new_fi) {
|
6326
|
-
new_fi = frt_fi_new(fi->name, FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
6656
|
+
new_fi = frt_fi_new(fi->name, FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
6327
6657
|
new_fi->bits = fi->bits;
|
6328
6658
|
frt_fis_add_field(fis, new_fi);
|
6329
6659
|
}
|