isomorfeus-ferret 0.12.5 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +54 -4
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
- data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +26 -25
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +113 -58
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -13,6 +13,7 @@
|
|
13
13
|
#include "frt_priorityqueue.h"
|
14
14
|
|
15
15
|
typedef struct FrtIndexReader FrtIndexReader;
|
16
|
+
typedef struct FrtSegmentReader FrtSegmentReader;
|
16
17
|
typedef struct FrtMultiReader FrtMultiReader;
|
17
18
|
typedef struct FrtDeleter FrtDeleter;
|
18
19
|
|
@@ -22,8 +23,7 @@ typedef struct FrtDeleter FrtDeleter;
|
|
22
23
|
*
|
23
24
|
****************************************************************************/
|
24
25
|
|
25
|
-
typedef struct FrtConfig
|
26
|
-
{
|
26
|
+
typedef struct FrtConfig {
|
27
27
|
int chunk_size;
|
28
28
|
int max_buffer_memory;
|
29
29
|
int index_interval;
|
@@ -52,8 +52,7 @@ typedef struct FrtCacheObject {
|
|
52
52
|
void (*destroy)(void *p);
|
53
53
|
} FrtCacheObject;
|
54
54
|
|
55
|
-
extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1,
|
56
|
-
FrtHash *ref_tab2,
|
55
|
+
extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
|
57
56
|
void *ref1, void *ref2, frt_free_ft destroy, void *obj);
|
58
57
|
extern FrtHash *frt_co_hash_create();
|
59
58
|
|
@@ -63,14 +62,12 @@ extern FrtHash *frt_co_hash_create();
|
|
63
62
|
*
|
64
63
|
****************************************************************************/
|
65
64
|
|
66
|
-
typedef enum
|
67
|
-
{
|
65
|
+
typedef enum {
|
68
66
|
FRT_STORE_NO = 0,
|
69
|
-
FRT_STORE_YES = 1
|
67
|
+
FRT_STORE_YES = 1,
|
70
68
|
} FrtStoreValue;
|
71
69
|
|
72
|
-
typedef enum
|
73
|
-
{
|
70
|
+
typedef enum {
|
74
71
|
FRT_INDEX_NO = 0,
|
75
72
|
FRT_INDEX_UNTOKENIZED = 1,
|
76
73
|
FRT_INDEX_YES = 3,
|
@@ -78,8 +75,7 @@ typedef enum
|
|
78
75
|
FRT_INDEX_YES_OMIT_NORMS = 7
|
79
76
|
} FrtIndexValue;
|
80
77
|
|
81
|
-
typedef enum
|
82
|
-
{
|
78
|
+
typedef enum {
|
83
79
|
FRT_TERM_VECTOR_NO = 0,
|
84
80
|
FRT_TERM_VECTOR_YES = 1,
|
85
81
|
FRT_TERM_VECTOR_WITH_POSITIONS = 3,
|
@@ -87,37 +83,44 @@ typedef enum
|
|
87
83
|
FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
|
88
84
|
} FrtTermVectorValue;
|
89
85
|
|
90
|
-
#define FRT_FI_IS_STORED_BM
|
91
|
-
#define
|
92
|
-
#define
|
93
|
-
#define
|
94
|
-
#define
|
95
|
-
#define
|
96
|
-
#define
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
86
|
+
#define FRT_FI_IS_STORED_BM 0x001
|
87
|
+
#define FRT_FI_IS_COMPRESSED_BM 0x002
|
88
|
+
#define FRT_FI_IS_INDEXED_BM 0x004
|
89
|
+
#define FRT_FI_IS_TOKENIZED_BM 0x008
|
90
|
+
#define FRT_FI_OMIT_NORMS_BM 0x010
|
91
|
+
#define FRT_FI_STORE_TERM_VECTOR_BM 0x020
|
92
|
+
#define FRT_FI_STORE_POSITIONS_BM 0x040
|
93
|
+
#define FRT_FI_STORE_OFFSETS_BM 0x080
|
94
|
+
#define FRT_FI_COMPRESSION_BROTLI_BM 0x100
|
95
|
+
#define FRT_FI_COMPRESSION_BZ2_BM 0x200
|
96
|
+
#define FRT_FI_COMPRESSION_LZ4_BM 0x400
|
97
|
+
|
98
|
+
typedef struct FrtFieldInfo {
|
99
|
+
ID name;
|
100
|
+
float boost;
|
102
101
|
unsigned int bits;
|
103
|
-
int
|
104
|
-
int
|
102
|
+
int number;
|
103
|
+
int ref_cnt;
|
104
|
+
VALUE rfi;
|
105
105
|
} FrtFieldInfo;
|
106
106
|
|
107
|
-
extern FrtFieldInfo *
|
108
|
-
|
109
|
-
|
110
|
-
FrtTermVectorValue term_vector);
|
107
|
+
extern FrtFieldInfo *frt_fi_alloc();
|
108
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
109
|
+
extern FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
111
110
|
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
112
111
|
extern void frt_fi_deref(FrtFieldInfo *fi);
|
113
112
|
|
114
|
-
#define fi_is_stored(fi)
|
115
|
-
#define
|
116
|
-
#define
|
117
|
-
#define
|
118
|
-
#define
|
119
|
-
#define
|
120
|
-
#define
|
113
|
+
#define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
|
114
|
+
#define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
|
115
|
+
#define fi_is_compressed_brotli(fi) (((fi)->bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
116
|
+
#define fi_is_compressed_bz2(fi) (((fi)->bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
117
|
+
#define fi_is_compressed_lz4(fi) (((fi)->bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
118
|
+
#define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
|
119
|
+
#define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
120
|
+
#define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
121
|
+
#define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
122
|
+
#define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
123
|
+
#define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
121
124
|
#define fi_has_norms(fi)\
|
122
125
|
(((fi)->bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
123
126
|
|
@@ -129,25 +132,26 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
|
|
129
132
|
|
130
133
|
#define FIELD_INFOS_INIT_CAPA 4
|
131
134
|
/* carry changes over to dummy_fis in test/test_segments.c */
|
132
|
-
typedef struct FrtFieldInfos
|
133
|
-
|
134
|
-
|
135
|
-
FrtIndexValue
|
135
|
+
typedef struct FrtFieldInfos {
|
136
|
+
FrtStoreValue store;
|
137
|
+
FrtCompressionType compression;
|
138
|
+
FrtIndexValue index;
|
136
139
|
FrtTermVectorValue term_vector;
|
137
|
-
int
|
138
|
-
int
|
139
|
-
FrtFieldInfo
|
140
|
-
FrtHash
|
141
|
-
int
|
140
|
+
int size;
|
141
|
+
int capa;
|
142
|
+
FrtFieldInfo **fields;
|
143
|
+
FrtHash *field_dict;
|
144
|
+
int ref_cnt;
|
145
|
+
VALUE rfis;
|
142
146
|
} FrtFieldInfos;
|
143
147
|
|
144
|
-
FrtFieldInfos *
|
145
|
-
|
148
|
+
FrtFieldInfos *frt_fis_alloc();
|
149
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
150
|
+
FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
146
151
|
extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
|
147
|
-
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis,
|
148
|
-
extern int frt_fis_get_field_num(FrtFieldInfos *fis,
|
149
|
-
extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis,
|
150
|
-
FrtSymbol name);
|
152
|
+
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
|
153
|
+
extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
|
154
|
+
extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name);
|
151
155
|
extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
|
152
156
|
extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
|
153
157
|
extern char *frt_fis_to_s(FrtFieldInfos *fis);
|
@@ -162,8 +166,7 @@ extern void frt_fis_deref(FrtFieldInfos *fis);
|
|
162
166
|
#define FRT_SEGMENT_NAME_MAX_LENGTH 100
|
163
167
|
#define FRT_SEGMENTS_FILE_NAME "segments"
|
164
168
|
|
165
|
-
typedef struct FrtSegmentInfo
|
166
|
-
{
|
169
|
+
typedef struct FrtSegmentInfo {
|
167
170
|
int ref_cnt;
|
168
171
|
char *name;
|
169
172
|
FrtStore *store;
|
@@ -186,8 +189,7 @@ extern void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num);
|
|
186
189
|
*
|
187
190
|
****************************************************************************/
|
188
191
|
|
189
|
-
typedef struct FrtSegmentInfos
|
190
|
-
{
|
192
|
+
typedef struct FrtSegmentInfos {
|
191
193
|
FrtFieldInfos *fis;
|
192
194
|
frt_u64 counter;
|
193
195
|
frt_u64 version;
|
@@ -199,10 +201,7 @@ typedef struct FrtSegmentInfos
|
|
199
201
|
int capa;
|
200
202
|
} FrtSegmentInfos;
|
201
203
|
|
202
|
-
extern char *frt_fn_for_generation(char *buf,
|
203
|
-
const char *base,
|
204
|
-
const char *ext,
|
205
|
-
frt_i64 gen);
|
204
|
+
extern char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i64 gen);
|
206
205
|
|
207
206
|
extern FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis);
|
208
207
|
extern FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int dcnt, FrtStore *store);
|
@@ -223,9 +222,8 @@ extern void frt_sis_put(FrtSegmentInfos *sis, FILE *stream);
|
|
223
222
|
*
|
224
223
|
****************************************************************************/
|
225
224
|
|
226
|
-
typedef struct FrtTermInfo
|
227
|
-
|
228
|
-
int doc_freq;
|
225
|
+
typedef struct FrtTermInfo {
|
226
|
+
int doc_freq;
|
229
227
|
off_t frq_ptr;
|
230
228
|
off_t prx_ptr;
|
231
229
|
off_t skip_offset;
|
@@ -239,24 +237,21 @@ typedef struct FrtTermInfo
|
|
239
237
|
} while (0)
|
240
238
|
|
241
239
|
/****************************************************************************
|
242
|
-
*
|
243
240
|
* FrtTermEnum
|
244
|
-
*
|
245
241
|
****************************************************************************/
|
246
242
|
|
247
243
|
typedef struct FrtTermEnum FrtTermEnum;
|
248
244
|
|
249
|
-
struct FrtTermEnum
|
250
|
-
{
|
245
|
+
struct FrtTermEnum {
|
251
246
|
char curr_term[FRT_MAX_WORD_SIZE];
|
252
247
|
char prev_term[FRT_MAX_WORD_SIZE];
|
253
|
-
FrtTermInfo
|
248
|
+
FrtTermInfo curr_ti;
|
254
249
|
int curr_term_len;
|
255
250
|
int field_num;
|
256
251
|
FrtTermEnum *(*set_field)(FrtTermEnum *te, int field_num);
|
257
|
-
char
|
258
|
-
char
|
259
|
-
void
|
252
|
+
char *(*next)(FrtTermEnum *te);
|
253
|
+
char *(*skip_to)(FrtTermEnum *te, const char *term);
|
254
|
+
void (*close)(FrtTermEnum *te);
|
260
255
|
FrtTermEnum *(*clone)(FrtTermEnum *te);
|
261
256
|
};
|
262
257
|
|
@@ -264,59 +259,54 @@ char *frt_te_get_term(struct FrtTermEnum *te);
|
|
264
259
|
FrtTermInfo *frt_te_get_ti(struct FrtTermEnum *te);
|
265
260
|
|
266
261
|
/****************************************************************************
|
267
|
-
*
|
268
262
|
* FrtSegmentTermEnum
|
269
|
-
*
|
270
263
|
****************************************************************************/
|
271
264
|
|
272
|
-
/*
|
265
|
+
/* FrtSegmentTermIndex */
|
273
266
|
|
274
|
-
typedef struct FrtSegmentTermIndex
|
275
|
-
{
|
267
|
+
typedef struct FrtSegmentTermIndex {
|
276
268
|
off_t index_ptr;
|
277
269
|
off_t ptr;
|
278
270
|
int index_cnt;
|
279
271
|
int size;
|
280
|
-
char
|
281
|
-
int
|
282
|
-
FrtTermInfo
|
283
|
-
off_t
|
272
|
+
char **index_terms;
|
273
|
+
int *index_term_lens;
|
274
|
+
FrtTermInfo *index_term_infos;
|
275
|
+
off_t *index_ptrs;
|
284
276
|
} FrtSegmentTermIndex;
|
285
277
|
|
286
|
-
/*
|
278
|
+
/* FrtSegmentFieldIndex */
|
287
279
|
|
288
|
-
typedef struct
|
289
|
-
|
290
|
-
typedef struct FrtSegmentFieldIndex
|
291
|
-
{
|
292
|
-
frt_mutex_t mutex;
|
280
|
+
typedef struct FrtSegmentFieldIndex {
|
281
|
+
frt_mutex_t mutex;
|
293
282
|
int skip_interval;
|
294
283
|
int index_interval;
|
295
284
|
off_t index_ptr;
|
296
|
-
FrtTermEnum
|
297
|
-
FrtHash
|
285
|
+
FrtTermEnum *index_te;
|
286
|
+
FrtHash *field_dict;
|
298
287
|
} FrtSegmentFieldIndex;
|
299
288
|
|
300
|
-
|
301
|
-
extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
|
289
|
+
/* FrtSegmentTermEnum */
|
302
290
|
|
291
|
+
typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
|
303
292
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
FrtTermEnum te;
|
308
|
-
FrtInStream *is;
|
293
|
+
struct FrtSegmentTermEnum {
|
294
|
+
FrtTermEnum te;
|
295
|
+
FrtInStream *is;
|
309
296
|
int size;
|
310
297
|
int pos;
|
311
298
|
int skip_interval;
|
312
299
|
FrtSegmentFieldIndex *sfi;
|
313
300
|
};
|
314
301
|
|
302
|
+
extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
|
303
|
+
extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
|
304
|
+
|
315
305
|
extern void frt_ste_close(FrtTermEnum *te);
|
316
306
|
extern FrtTermEnum *frt_ste_clone(FrtTermEnum *te);
|
317
307
|
extern FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi);
|
318
308
|
|
319
|
-
/*
|
309
|
+
/* MultiTermEnum */
|
320
310
|
|
321
311
|
extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term);
|
322
312
|
|
@@ -326,17 +316,14 @@ extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *t
|
|
326
316
|
*
|
327
317
|
****************************************************************************/
|
328
318
|
|
329
|
-
typedef struct FrtTermInfosReader
|
330
|
-
{
|
319
|
+
typedef struct FrtTermInfosReader {
|
331
320
|
frt_thread_key_t thread_te;
|
332
|
-
void
|
333
|
-
FrtTermEnum
|
334
|
-
int
|
321
|
+
void **te_bucket;
|
322
|
+
FrtTermEnum *orig_te;
|
323
|
+
int field_num;
|
335
324
|
} FrtTermInfosReader;
|
336
325
|
|
337
|
-
extern FrtTermInfosReader *frt_tir_open(FrtStore *store,
|
338
|
-
FrtSegmentFieldIndex *sfi,
|
339
|
-
const char *segment);
|
326
|
+
extern FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, const char *segment);
|
340
327
|
extern FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num);
|
341
328
|
extern FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term);
|
342
329
|
extern char *frt_tir_get_term(FrtTermInfosReader *tir, int pos);
|
@@ -351,34 +338,26 @@ extern void frt_tir_close(FrtTermInfosReader *tir);
|
|
351
338
|
#define FRT_INDEX_INTERVAL 128
|
352
339
|
#define FRT_SKIP_INTERVAL 16
|
353
340
|
|
354
|
-
typedef struct FrtTermWriter
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
FrtTermInfo last_term_info;
|
341
|
+
typedef struct FrtTermWriter {
|
342
|
+
int counter;
|
343
|
+
const char *last_term;
|
344
|
+
FrtTermInfo last_term_info;
|
359
345
|
FrtOutStream *os;
|
360
346
|
} FrtTermWriter;
|
361
347
|
|
362
|
-
typedef struct FrtTermInfosWriter
|
363
|
-
|
364
|
-
int
|
365
|
-
int
|
366
|
-
|
367
|
-
|
368
|
-
FrtOutStream *tfx_out;
|
348
|
+
typedef struct FrtTermInfosWriter {
|
349
|
+
int field_count;
|
350
|
+
int index_interval;
|
351
|
+
int skip_interval;
|
352
|
+
off_t last_index_ptr;
|
353
|
+
FrtOutStream *tfx_out;
|
369
354
|
FrtTermWriter *tix_writer;
|
370
355
|
FrtTermWriter *tis_writer;
|
371
356
|
} FrtTermInfosWriter;
|
372
357
|
|
373
|
-
extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
|
374
|
-
const char *segment,
|
375
|
-
int index_interval,
|
376
|
-
int skip_interval);
|
358
|
+
extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index_interval, int skip_interval);
|
377
359
|
extern void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num);
|
378
|
-
extern void frt_tiw_add(FrtTermInfosWriter *tiw,
|
379
|
-
const char *term,
|
380
|
-
int t_len,
|
381
|
-
FrtTermInfo *ti);
|
360
|
+
extern void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int t_len, FrtTermInfo *ti);
|
382
361
|
extern void frt_tiw_close(FrtTermInfosWriter *tiw);
|
383
362
|
|
384
363
|
/****************************************************************************
|
@@ -388,8 +367,7 @@ extern void frt_tiw_close(FrtTermInfosWriter *tiw);
|
|
388
367
|
****************************************************************************/
|
389
368
|
|
390
369
|
typedef struct FrtTermDocEnum FrtTermDocEnum;
|
391
|
-
struct FrtTermDocEnum
|
392
|
-
{
|
370
|
+
struct FrtTermDocEnum {
|
393
371
|
void (*seek)(FrtTermDocEnum *tde, int field_num, const char *term);
|
394
372
|
void (*seek_te)(FrtTermDocEnum *tde, FrtTermEnum *te);
|
395
373
|
void (*seek_ti)(FrtTermDocEnum *tde, FrtTermInfo *ti);
|
@@ -405,8 +383,7 @@ struct FrtTermDocEnum
|
|
405
383
|
/* * FrtSegmentTermDocEnum * */
|
406
384
|
|
407
385
|
typedef struct FrtSegmentTermDocEnum FrtSegmentTermDocEnum;
|
408
|
-
struct FrtSegmentTermDocEnum
|
409
|
-
{
|
386
|
+
struct FrtSegmentTermDocEnum {
|
410
387
|
FrtTermDocEnum tde;
|
411
388
|
void (*seek_prox)(FrtSegmentTermDocEnum *stde, off_t prx_ptr);
|
412
389
|
void (*skip_prox)(FrtSegmentTermDocEnum *stde);
|
@@ -443,8 +420,7 @@ extern FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir, FrtInStream *frq_in
|
|
443
420
|
* MultipleTermDocPosEnum
|
444
421
|
****************************************************************************/
|
445
422
|
|
446
|
-
extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms,
|
447
|
-
int t_cnt);
|
423
|
+
extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt);
|
448
424
|
|
449
425
|
/****************************************************************************
|
450
426
|
*
|
@@ -452,8 +428,7 @@ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **t
|
|
452
428
|
*
|
453
429
|
****************************************************************************/
|
454
430
|
|
455
|
-
typedef struct FrtOffset
|
456
|
-
{
|
431
|
+
typedef struct FrtOffset {
|
457
432
|
off_t start;
|
458
433
|
off_t end;
|
459
434
|
} FrtOffset;
|
@@ -464,8 +439,7 @@ typedef struct FrtOffset
|
|
464
439
|
*
|
465
440
|
****************************************************************************/
|
466
441
|
|
467
|
-
typedef struct FrtOccurence
|
468
|
-
{
|
442
|
+
typedef struct FrtOccurence {
|
469
443
|
struct FrtOccurence *next;
|
470
444
|
int pos;
|
471
445
|
} FrtOccurence;
|
@@ -476,8 +450,7 @@ typedef struct FrtOccurence
|
|
476
450
|
*
|
477
451
|
****************************************************************************/
|
478
452
|
|
479
|
-
typedef struct FrtPosting
|
480
|
-
{
|
453
|
+
typedef struct FrtPosting {
|
481
454
|
int freq;
|
482
455
|
int doc_num;
|
483
456
|
FrtOccurence *first_occ;
|
@@ -492,17 +465,15 @@ extern FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos);
|
|
492
465
|
*
|
493
466
|
****************************************************************************/
|
494
467
|
|
495
|
-
typedef struct FrtPostingList
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
FrtPosting
|
500
|
-
FrtPosting *last;
|
468
|
+
typedef struct FrtPostingList {
|
469
|
+
const char *term;
|
470
|
+
int term_len;
|
471
|
+
FrtPosting *first;
|
472
|
+
FrtPosting *last;
|
501
473
|
FrtOccurence *last_occ;
|
502
474
|
} FrtPostingList;
|
503
475
|
|
504
|
-
extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
505
|
-
int term_len, FrtPosting *p);
|
476
|
+
extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term, int term_len, FrtPosting *p);
|
506
477
|
extern void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos);
|
507
478
|
extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
|
508
479
|
|
@@ -512,8 +483,7 @@ extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
|
|
512
483
|
*
|
513
484
|
****************************************************************************/
|
514
485
|
|
515
|
-
typedef struct FrtTVField
|
516
|
-
{
|
486
|
+
typedef struct FrtTVField {
|
517
487
|
int field_num;
|
518
488
|
int size;
|
519
489
|
} FrtTVField;
|
@@ -524,11 +494,10 @@ typedef struct FrtTVField
|
|
524
494
|
*
|
525
495
|
****************************************************************************/
|
526
496
|
|
527
|
-
typedef struct FrtTVTerm
|
528
|
-
|
529
|
-
|
530
|
-
int
|
531
|
-
int *positions;
|
497
|
+
typedef struct FrtTVTerm {
|
498
|
+
char *text;
|
499
|
+
int freq;
|
500
|
+
int *positions;
|
532
501
|
} FrtTVTerm;
|
533
502
|
|
534
503
|
/****************************************************************************
|
@@ -538,10 +507,9 @@ typedef struct FrtTVTerm
|
|
538
507
|
****************************************************************************/
|
539
508
|
|
540
509
|
#define FRT_TV_FIELD_INIT_CAPA 8
|
541
|
-
typedef struct FrtTermVector
|
542
|
-
{
|
510
|
+
typedef struct FrtTermVector {
|
543
511
|
int field_num;
|
544
|
-
|
512
|
+
ID field;
|
545
513
|
int term_cnt;
|
546
514
|
FrtTVTerm *terms;
|
547
515
|
int offset_cnt;
|
@@ -560,38 +528,38 @@ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
|
|
560
528
|
****************************************************************************/
|
561
529
|
|
562
530
|
/* * * FrtLazyDocField * * */
|
563
|
-
typedef struct FrtLazyDocFieldData
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
531
|
+
typedef struct FrtLazyDocFieldData {
|
532
|
+
off_t start;
|
533
|
+
int length;
|
534
|
+
rb_encoding *encoding;
|
535
|
+
FrtCompressionType compression; /* as stored */
|
536
|
+
char *text;
|
568
537
|
} FrtLazyDocFieldData;
|
569
538
|
|
570
539
|
typedef struct FrtLazyDoc FrtLazyDoc;
|
571
|
-
typedef struct FrtLazyDocField
|
572
|
-
|
573
|
-
FrtSymbol name;
|
540
|
+
typedef struct FrtLazyDocField {
|
541
|
+
ID name;
|
574
542
|
FrtLazyDocFieldData *data;
|
575
543
|
FrtLazyDoc *doc;
|
576
544
|
int size; /* number of data elements */
|
577
545
|
int len; /* length of data elements concatenated */
|
546
|
+
FrtCompressionType compression; /* as configured */
|
547
|
+
bool decompressed;
|
578
548
|
} FrtLazyDocField;
|
579
549
|
|
580
550
|
extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
|
581
|
-
extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf,
|
582
|
-
int start, int len);
|
551
|
+
extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len);
|
583
552
|
|
584
553
|
/* * * FrtLazyDoc * * */
|
585
|
-
struct FrtLazyDoc
|
586
|
-
|
587
|
-
|
588
|
-
int size;
|
554
|
+
struct FrtLazyDoc {
|
555
|
+
FrtHash *field_dictionary;
|
556
|
+
int size;
|
589
557
|
FrtLazyDocField **fields;
|
590
|
-
FrtInStream
|
558
|
+
FrtInStream *fields_in;
|
591
559
|
};
|
592
560
|
|
593
561
|
extern void frt_lazy_doc_close(FrtLazyDoc *self);
|
594
|
-
extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self,
|
562
|
+
extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field);
|
595
563
|
|
596
564
|
/****************************************************************************
|
597
565
|
*
|
@@ -599,8 +567,7 @@ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
|
|
599
567
|
*
|
600
568
|
****************************************************************************/
|
601
569
|
|
602
|
-
typedef struct FrtFieldsReader
|
603
|
-
{
|
570
|
+
typedef struct FrtFieldsReader {
|
604
571
|
int size;
|
605
572
|
FrtFieldInfos *fis;
|
606
573
|
FrtStore *store;
|
@@ -608,15 +575,13 @@ typedef struct FrtFieldsReader
|
|
608
575
|
FrtInStream *fdt_in;
|
609
576
|
} FrtFieldsReader;
|
610
577
|
|
611
|
-
extern FrtFieldsReader *frt_fr_open(FrtStore *store,
|
612
|
-
const char *segment, FrtFieldInfos *fis);
|
578
|
+
extern FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
|
613
579
|
extern FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig);
|
614
580
|
extern void frt_fr_close(FrtFieldsReader *fr);
|
615
581
|
extern FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num);
|
616
582
|
extern FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num);
|
617
583
|
extern FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num);
|
618
|
-
extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
|
619
|
-
int field_num);
|
584
|
+
extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num, int field_num);
|
620
585
|
|
621
586
|
/****************************************************************************
|
622
587
|
*
|
@@ -624,18 +589,16 @@ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
|
|
624
589
|
*
|
625
590
|
****************************************************************************/
|
626
591
|
|
627
|
-
typedef struct FrtFieldsWriter
|
628
|
-
{
|
592
|
+
typedef struct FrtFieldsWriter {
|
629
593
|
FrtFieldInfos *fis;
|
630
594
|
FrtOutStream *fdt_out;
|
631
595
|
FrtOutStream *fdx_out;
|
632
596
|
FrtOutStream *buffer;
|
633
597
|
FrtTVField *tv_fields;
|
634
|
-
off_t
|
598
|
+
off_t start_ptr;
|
635
599
|
} FrtFieldsWriter;
|
636
600
|
|
637
|
-
extern FrtFieldsWriter *frt_fw_open(FrtStore *store,
|
638
|
-
const char *segment, FrtFieldInfos *fis);
|
601
|
+
extern FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
|
639
602
|
extern void frt_fw_close(FrtFieldsWriter *fw);
|
640
603
|
extern void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc);
|
641
604
|
extern void frt_fw_add_postings(FrtFieldsWriter *fw,
|
@@ -656,11 +619,10 @@ extern void frt_fw_write_tv_index(FrtFieldsWriter *fw);
|
|
656
619
|
*
|
657
620
|
****************************************************************************/
|
658
621
|
|
659
|
-
struct FrtDeleter
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
FrtHashSet *pending;
|
622
|
+
struct FrtDeleter {
|
623
|
+
FrtStore *store;
|
624
|
+
FrtSegmentInfos *sis;
|
625
|
+
FrtHashSet *pending;
|
664
626
|
};
|
665
627
|
|
666
628
|
extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
|
@@ -678,88 +640,115 @@ extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt
|
|
678
640
|
#define FRT_WRITE_LOCK_NAME "write"
|
679
641
|
#define FRT_COMMIT_LOCK_NAME "commit"
|
680
642
|
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
void
|
706
|
-
void
|
707
|
-
void
|
708
|
-
|
709
|
-
|
710
|
-
void
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
FrtHash
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
bool
|
724
|
-
bool
|
643
|
+
typedef enum {
|
644
|
+
FRT_INDEX_READER,
|
645
|
+
FRT_SEGMENT_READER,
|
646
|
+
FRT_MULTI_READER
|
647
|
+
} frt_index_reader_t;
|
648
|
+
|
649
|
+
struct FrtIndexReader {
|
650
|
+
int type;
|
651
|
+
int (*num_docs)(FrtIndexReader *ir);
|
652
|
+
int (*max_doc)(FrtIndexReader *ir);
|
653
|
+
FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
|
654
|
+
FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
|
655
|
+
frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
|
656
|
+
frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num, frt_uchar *buf);
|
657
|
+
FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
|
658
|
+
FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num, const char *term);
|
659
|
+
int (*doc_freq)(FrtIndexReader *ir, int field_num, const char *term);
|
660
|
+
FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
|
661
|
+
FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
|
662
|
+
FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num, ID field);
|
663
|
+
FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
|
664
|
+
bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
|
665
|
+
bool (*has_deletions)(FrtIndexReader *ir);
|
666
|
+
void (*acquire_write_lock)(FrtIndexReader *ir);
|
667
|
+
void (*set_norm_i)(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val);
|
668
|
+
void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
|
669
|
+
void (*undelete_all_i)(FrtIndexReader *ir);
|
670
|
+
void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
|
671
|
+
bool (*is_latest_i)(FrtIndexReader *ir);
|
672
|
+
void (*commit_i)(FrtIndexReader *ir);
|
673
|
+
void (*close_i)(FrtIndexReader *ir);
|
674
|
+
int ref_cnt;
|
675
|
+
FrtDeleter *deleter;
|
676
|
+
FrtStore *store;
|
677
|
+
FrtLock *write_lock;
|
678
|
+
FrtSegmentInfos *sis;
|
679
|
+
FrtFieldInfos *fis;
|
680
|
+
FrtHash *cache;
|
681
|
+
FrtHash *field_index_cache;
|
682
|
+
frt_mutex_t field_index_mutex;
|
683
|
+
frt_uchar *fake_norms;
|
684
|
+
frt_mutex_t mutex;
|
685
|
+
bool has_changes : 1;
|
686
|
+
bool is_stale : 1;
|
687
|
+
bool is_owner : 1;
|
688
|
+
VALUE rir;
|
725
689
|
};
|
726
690
|
|
727
|
-
extern FrtIndexReader *frt_ir_open(FrtStore *store);
|
691
|
+
extern FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store);
|
728
692
|
extern void frt_ir_close(FrtIndexReader *ir);
|
729
693
|
extern void frt_ir_commit(FrtIndexReader *ir);
|
730
694
|
extern void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num);
|
731
695
|
extern void frt_ir_undelete_all(FrtIndexReader *ir);
|
732
|
-
extern int frt_ir_doc_freq(FrtIndexReader *ir,
|
733
|
-
extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num,
|
696
|
+
extern int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term);
|
697
|
+
extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val);
|
734
698
|
extern frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num);
|
735
|
-
extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir,
|
736
|
-
extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir,
|
737
|
-
extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir,
|
738
|
-
extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir,
|
739
|
-
extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir,
|
740
|
-
extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir,
|
741
|
-
extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir,
|
699
|
+
extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, ID field);
|
700
|
+
extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf);
|
701
|
+
extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, ID field, const char *term);
|
702
|
+
extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, ID field);
|
703
|
+
extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, ID field, const char *t);
|
704
|
+
extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, ID field, const char *term);
|
705
|
+
extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const char *t);
|
742
706
|
extern void frt_ir_add_cache(FrtIndexReader *ir);
|
743
707
|
extern bool frt_ir_is_latest(FrtIndexReader *ir);
|
744
708
|
|
709
|
+
/****************************************************************************
|
710
|
+
* FrtSegmentReader
|
711
|
+
****************************************************************************/
|
712
|
+
|
713
|
+
struct FrtSegmentReader {
|
714
|
+
FrtIndexReader ir;
|
715
|
+
FrtSegmentInfo *si;
|
716
|
+
char *segment;
|
717
|
+
FrtFieldsReader *fr;
|
718
|
+
FrtBitVector *deleted_docs;
|
719
|
+
FrtInStream *frq_in;
|
720
|
+
FrtInStream *prx_in;
|
721
|
+
FrtSegmentFieldIndex *sfi;
|
722
|
+
FrtTermInfosReader *tir;
|
723
|
+
frt_thread_key_t thread_fr;
|
724
|
+
void **fr_bucket;
|
725
|
+
FrtHash *norms;
|
726
|
+
FrtStore *cfs_store;
|
727
|
+
bool deleted_docs_dirty : 1;
|
728
|
+
bool undelete_all : 1;
|
729
|
+
bool norms_dirty : 1;
|
730
|
+
};
|
731
|
+
|
732
|
+
extern FrtSegmentReader *frt_sr_alloc();
|
733
|
+
|
745
734
|
/****************************************************************************
|
746
735
|
* FrtMultiReader
|
747
736
|
****************************************************************************/
|
748
737
|
|
749
738
|
struct FrtMultiReader {
|
750
739
|
FrtIndexReader ir;
|
751
|
-
int
|
752
|
-
int
|
753
|
-
int
|
754
|
-
int
|
740
|
+
int max_doc;
|
741
|
+
int num_docs_cache;
|
742
|
+
int r_cnt;
|
743
|
+
int *starts;
|
755
744
|
FrtIndexReader **sub_readers;
|
756
|
-
FrtHash
|
757
|
-
bool
|
758
|
-
int
|
745
|
+
FrtHash *norms_cache;
|
746
|
+
bool has_deletions : 1;
|
747
|
+
int **field_num_map;
|
759
748
|
};
|
760
749
|
|
761
750
|
extern int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num);
|
762
|
-
extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt);
|
751
|
+
extern FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, const int r_cnt);
|
763
752
|
|
764
753
|
/****************************************************************************
|
765
754
|
*
|
@@ -767,16 +756,15 @@ extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt
|
|
767
756
|
*
|
768
757
|
****************************************************************************/
|
769
758
|
|
770
|
-
typedef struct FrtFieldInverter
|
771
|
-
|
772
|
-
|
773
|
-
frt_uchar *norms;
|
759
|
+
typedef struct FrtFieldInverter {
|
760
|
+
FrtHash *plists;
|
761
|
+
frt_uchar *norms;
|
774
762
|
FrtFieldInfo *fi;
|
775
|
-
int
|
776
|
-
bool
|
777
|
-
bool
|
778
|
-
bool
|
779
|
-
bool
|
763
|
+
int length;
|
764
|
+
bool is_tokenized : 1;
|
765
|
+
bool store_term_vector : 1;
|
766
|
+
bool store_offsets : 1;
|
767
|
+
bool has_norms : 1;
|
780
768
|
} FrtFieldInverter;
|
781
769
|
|
782
770
|
/****************************************************************************
|
@@ -788,18 +776,17 @@ typedef struct FrtFieldInverter
|
|
788
776
|
#define DW_OFFSET_INIT_CAPA 512
|
789
777
|
typedef struct FrtIndexWriter FrtIndexWriter;
|
790
778
|
|
791
|
-
typedef struct FrtDocWriter
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
FrtFieldInfos *fis;
|
779
|
+
typedef struct FrtDocWriter {
|
780
|
+
FrtStore *store;
|
781
|
+
FrtSegmentInfo *si;
|
782
|
+
FrtFieldInfos *fis;
|
796
783
|
FrtFieldsWriter *fw;
|
797
|
-
FrtMemoryPool
|
798
|
-
FrtAnalyzer
|
799
|
-
FrtHash
|
800
|
-
FrtHash
|
801
|
-
FrtSimilarity
|
802
|
-
FrtOffset
|
784
|
+
FrtMemoryPool *mp;
|
785
|
+
FrtAnalyzer *analyzer;
|
786
|
+
FrtHash *curr_plists;
|
787
|
+
FrtHash *fields;
|
788
|
+
FrtSimilarity *similarity;
|
789
|
+
FrtOffset *offsets;
|
803
790
|
int offsets_size;
|
804
791
|
int offsets_capa;
|
805
792
|
int doc_num;
|
@@ -814,9 +801,7 @@ extern void frt_dw_close(FrtDocWriter *dw);
|
|
814
801
|
extern void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc);
|
815
802
|
extern void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si);
|
816
803
|
/* For testing. need to remove somehow. FIXME */
|
817
|
-
extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
818
|
-
FrtFieldInverter *fld_inv,
|
819
|
-
FrtDocField *df);
|
804
|
+
extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDocField *df);
|
820
805
|
extern FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi);
|
821
806
|
extern void frt_dw_reset_postings(FrtHash *postings);
|
822
807
|
|
@@ -826,25 +811,25 @@ extern void frt_dw_reset_postings(FrtHash *postings);
|
|
826
811
|
*
|
827
812
|
****************************************************************************/
|
828
813
|
|
829
|
-
struct FrtIndexWriter
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
FrtAnalyzer *analyzer;
|
814
|
+
struct FrtIndexWriter {
|
815
|
+
FrtConfig config;
|
816
|
+
frt_mutex_t mutex;
|
817
|
+
FrtStore *store;
|
818
|
+
FrtAnalyzer *analyzer;
|
835
819
|
FrtSegmentInfos *sis;
|
836
|
-
FrtFieldInfos
|
837
|
-
FrtDocWriter
|
838
|
-
FrtSimilarity
|
839
|
-
FrtLock
|
840
|
-
FrtDeleter
|
820
|
+
FrtFieldInfos *fis;
|
821
|
+
FrtDocWriter *dw;
|
822
|
+
FrtSimilarity *similarity;
|
823
|
+
FrtLock *write_lock;
|
824
|
+
FrtDeleter *deleter;
|
841
825
|
};
|
842
826
|
|
843
827
|
extern void frt_index_create(FrtStore *store, FrtFieldInfos *fis);
|
844
828
|
extern bool frt_index_is_locked(FrtStore *store);
|
845
|
-
extern FrtIndexWriter *
|
846
|
-
extern
|
847
|
-
extern void
|
829
|
+
extern FrtIndexWriter *frt_iw_alloc();
|
830
|
+
extern FrtIndexWriter *frt_iw_open(FrtIndexWriter *, FrtStore *store, FrtAnalyzer *analyzer, const FrtConfig *config);
|
831
|
+
extern void frt_iw_delete_term(FrtIndexWriter *iw, ID field, const char *term);
|
832
|
+
extern void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int term_cnt);
|
848
833
|
extern void frt_iw_close(FrtIndexWriter *iw);
|
849
834
|
extern void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc);
|
850
835
|
extern int frt_iw_doc_count(FrtIndexWriter *iw);
|
@@ -859,17 +844,16 @@ extern void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, con
|
|
859
844
|
****************************************************************************/
|
860
845
|
|
861
846
|
#define FRT_CW_INIT_CAPA 16
|
862
|
-
typedef struct FrtCWFileEntry
|
863
|
-
|
864
|
-
char *name;
|
847
|
+
typedef struct FrtCWFileEntry {
|
848
|
+
char *name;
|
865
849
|
off_t dir_offset;
|
866
850
|
off_t data_offset;
|
867
851
|
} FrtCWFileEntry;
|
868
852
|
|
869
853
|
typedef struct FrtCompoundWriter {
|
870
|
-
FrtStore
|
871
|
-
const char
|
872
|
-
FrtHashSet
|
854
|
+
FrtStore *store;
|
855
|
+
const char *name;
|
856
|
+
FrtHashSet *ids;
|
873
857
|
FrtCWFileEntry *file_entries;
|
874
858
|
} FrtCompoundWriter;
|
875
859
|
|