isomorfeus-ferret 0.12.7 → 0.13.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -13
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +497 -495
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +603 -410
- data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +27 -57
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -13,6 +13,7 @@
|
|
13
13
|
#include "frt_priorityqueue.h"
|
14
14
|
|
15
15
|
typedef struct FrtIndexReader FrtIndexReader;
|
16
|
+
typedef struct FrtSegmentReader FrtSegmentReader;
|
16
17
|
typedef struct FrtMultiReader FrtMultiReader;
|
17
18
|
typedef struct FrtDeleter FrtDeleter;
|
18
19
|
|
@@ -22,8 +23,7 @@ typedef struct FrtDeleter FrtDeleter;
|
|
22
23
|
*
|
23
24
|
****************************************************************************/
|
24
25
|
|
25
|
-
typedef struct FrtConfig
|
26
|
-
{
|
26
|
+
typedef struct FrtConfig {
|
27
27
|
int chunk_size;
|
28
28
|
int max_buffer_memory;
|
29
29
|
int index_interval;
|
@@ -52,8 +52,7 @@ typedef struct FrtCacheObject {
|
|
52
52
|
void (*destroy)(void *p);
|
53
53
|
} FrtCacheObject;
|
54
54
|
|
55
|
-
extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1,
|
56
|
-
FrtHash *ref_tab2,
|
55
|
+
extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
|
57
56
|
void *ref1, void *ref2, frt_free_ft destroy, void *obj);
|
58
57
|
extern FrtHash *frt_co_hash_create();
|
59
58
|
|
@@ -63,15 +62,12 @@ extern FrtHash *frt_co_hash_create();
|
|
63
62
|
*
|
64
63
|
****************************************************************************/
|
65
64
|
|
66
|
-
typedef enum
|
67
|
-
{
|
65
|
+
typedef enum {
|
68
66
|
FRT_STORE_NO = 0,
|
69
67
|
FRT_STORE_YES = 1,
|
70
|
-
FRT_STORE_COMPRESS = 2
|
71
68
|
} FrtStoreValue;
|
72
69
|
|
73
|
-
typedef enum
|
74
|
-
{
|
70
|
+
typedef enum {
|
75
71
|
FRT_INDEX_NO = 0,
|
76
72
|
FRT_INDEX_UNTOKENIZED = 1,
|
77
73
|
FRT_INDEX_YES = 3,
|
@@ -79,8 +75,7 @@ typedef enum
|
|
79
75
|
FRT_INDEX_YES_OMIT_NORMS = 7
|
80
76
|
} FrtIndexValue;
|
81
77
|
|
82
|
-
typedef enum
|
83
|
-
{
|
78
|
+
typedef enum {
|
84
79
|
FRT_TERM_VECTOR_NO = 0,
|
85
80
|
FRT_TERM_VECTOR_YES = 1,
|
86
81
|
FRT_TERM_VECTOR_WITH_POSITIONS = 3,
|
@@ -88,39 +83,44 @@ typedef enum
|
|
88
83
|
FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
|
89
84
|
} FrtTermVectorValue;
|
90
85
|
|
91
|
-
#define FRT_FI_IS_STORED_BM
|
92
|
-
#define FRT_FI_IS_COMPRESSED_BM
|
93
|
-
#define FRT_FI_IS_INDEXED_BM
|
94
|
-
#define FRT_FI_IS_TOKENIZED_BM
|
95
|
-
#define FRT_FI_OMIT_NORMS_BM
|
96
|
-
#define FRT_FI_STORE_TERM_VECTOR_BM
|
97
|
-
#define FRT_FI_STORE_POSITIONS_BM
|
98
|
-
#define FRT_FI_STORE_OFFSETS_BM
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
86
|
+
#define FRT_FI_IS_STORED_BM 0x001
|
87
|
+
#define FRT_FI_IS_COMPRESSED_BM 0x002
|
88
|
+
#define FRT_FI_IS_INDEXED_BM 0x004
|
89
|
+
#define FRT_FI_IS_TOKENIZED_BM 0x008
|
90
|
+
#define FRT_FI_OMIT_NORMS_BM 0x010
|
91
|
+
#define FRT_FI_STORE_TERM_VECTOR_BM 0x020
|
92
|
+
#define FRT_FI_STORE_POSITIONS_BM 0x040
|
93
|
+
#define FRT_FI_STORE_OFFSETS_BM 0x080
|
94
|
+
#define FRT_FI_COMPRESSION_BROTLI_BM 0x100
|
95
|
+
#define FRT_FI_COMPRESSION_BZ2_BM 0x200
|
96
|
+
#define FRT_FI_COMPRESSION_LZ4_BM 0x400
|
97
|
+
|
98
|
+
typedef struct FrtFieldInfo {
|
99
|
+
ID name;
|
100
|
+
float boost;
|
104
101
|
unsigned int bits;
|
105
|
-
int
|
106
|
-
int
|
102
|
+
int number;
|
103
|
+
int ref_cnt;
|
104
|
+
VALUE rfi;
|
107
105
|
} FrtFieldInfo;
|
108
106
|
|
109
|
-
extern FrtFieldInfo *
|
110
|
-
|
111
|
-
|
112
|
-
FrtTermVectorValue term_vector);
|
107
|
+
extern FrtFieldInfo *frt_fi_alloc();
|
108
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
109
|
+
extern FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
113
110
|
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
114
111
|
extern void frt_fi_deref(FrtFieldInfo *fi);
|
115
112
|
|
116
|
-
#define fi_is_stored(fi)
|
117
|
-
#define fi_is_compressed(fi)
|
118
|
-
#define
|
119
|
-
#define
|
120
|
-
#define
|
121
|
-
#define
|
122
|
-
#define
|
123
|
-
#define
|
113
|
+
#define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
|
114
|
+
#define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
|
115
|
+
#define fi_is_compressed_brotli(fi) (((fi)->bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
116
|
+
#define fi_is_compressed_bz2(fi) (((fi)->bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
117
|
+
#define fi_is_compressed_lz4(fi) (((fi)->bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
118
|
+
#define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
|
119
|
+
#define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
120
|
+
#define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
121
|
+
#define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
122
|
+
#define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
123
|
+
#define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
124
124
|
#define fi_has_norms(fi)\
|
125
125
|
(((fi)->bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
126
126
|
|
@@ -132,25 +132,26 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
|
|
132
132
|
|
133
133
|
#define FIELD_INFOS_INIT_CAPA 4
|
134
134
|
/* carry changes over to dummy_fis in test/test_segments.c */
|
135
|
-
typedef struct FrtFieldInfos
|
136
|
-
|
137
|
-
|
138
|
-
FrtIndexValue
|
135
|
+
typedef struct FrtFieldInfos {
|
136
|
+
FrtStoreValue store;
|
137
|
+
FrtCompressionType compression;
|
138
|
+
FrtIndexValue index;
|
139
139
|
FrtTermVectorValue term_vector;
|
140
|
-
int
|
141
|
-
int
|
142
|
-
FrtFieldInfo
|
143
|
-
FrtHash
|
144
|
-
int
|
140
|
+
int size;
|
141
|
+
int capa;
|
142
|
+
FrtFieldInfo **fields;
|
143
|
+
FrtHash *field_dict;
|
144
|
+
int ref_cnt;
|
145
|
+
VALUE rfis;
|
145
146
|
} FrtFieldInfos;
|
146
147
|
|
147
|
-
FrtFieldInfos *
|
148
|
-
|
148
|
+
FrtFieldInfos *frt_fis_alloc();
|
149
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
150
|
+
FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
|
149
151
|
extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
|
150
|
-
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis,
|
151
|
-
extern int frt_fis_get_field_num(FrtFieldInfos *fis,
|
152
|
-
extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis,
|
153
|
-
FrtSymbol name);
|
152
|
+
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
|
153
|
+
extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
|
154
|
+
extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name);
|
154
155
|
extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
|
155
156
|
extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
|
156
157
|
extern char *frt_fis_to_s(FrtFieldInfos *fis);
|
@@ -165,8 +166,7 @@ extern void frt_fis_deref(FrtFieldInfos *fis);
|
|
165
166
|
#define FRT_SEGMENT_NAME_MAX_LENGTH 100
|
166
167
|
#define FRT_SEGMENTS_FILE_NAME "segments"
|
167
168
|
|
168
|
-
typedef struct FrtSegmentInfo
|
169
|
-
{
|
169
|
+
typedef struct FrtSegmentInfo {
|
170
170
|
int ref_cnt;
|
171
171
|
char *name;
|
172
172
|
FrtStore *store;
|
@@ -189,8 +189,7 @@ extern void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num);
|
|
189
189
|
*
|
190
190
|
****************************************************************************/
|
191
191
|
|
192
|
-
typedef struct FrtSegmentInfos
|
193
|
-
{
|
192
|
+
typedef struct FrtSegmentInfos {
|
194
193
|
FrtFieldInfos *fis;
|
195
194
|
frt_u64 counter;
|
196
195
|
frt_u64 version;
|
@@ -202,10 +201,7 @@ typedef struct FrtSegmentInfos
|
|
202
201
|
int capa;
|
203
202
|
} FrtSegmentInfos;
|
204
203
|
|
205
|
-
extern char *frt_fn_for_generation(char *buf,
|
206
|
-
const char *base,
|
207
|
-
const char *ext,
|
208
|
-
frt_i64 gen);
|
204
|
+
extern char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i64 gen);
|
209
205
|
|
210
206
|
extern FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis);
|
211
207
|
extern FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int dcnt, FrtStore *store);
|
@@ -226,9 +222,8 @@ extern void frt_sis_put(FrtSegmentInfos *sis, FILE *stream);
|
|
226
222
|
*
|
227
223
|
****************************************************************************/
|
228
224
|
|
229
|
-
typedef struct FrtTermInfo
|
230
|
-
|
231
|
-
int doc_freq;
|
225
|
+
typedef struct FrtTermInfo {
|
226
|
+
int doc_freq;
|
232
227
|
off_t frq_ptr;
|
233
228
|
off_t prx_ptr;
|
234
229
|
off_t skip_offset;
|
@@ -242,24 +237,21 @@ typedef struct FrtTermInfo
|
|
242
237
|
} while (0)
|
243
238
|
|
244
239
|
/****************************************************************************
|
245
|
-
*
|
246
240
|
* FrtTermEnum
|
247
|
-
*
|
248
241
|
****************************************************************************/
|
249
242
|
|
250
243
|
typedef struct FrtTermEnum FrtTermEnum;
|
251
244
|
|
252
|
-
struct FrtTermEnum
|
253
|
-
{
|
245
|
+
struct FrtTermEnum {
|
254
246
|
char curr_term[FRT_MAX_WORD_SIZE];
|
255
247
|
char prev_term[FRT_MAX_WORD_SIZE];
|
256
|
-
FrtTermInfo
|
248
|
+
FrtTermInfo curr_ti;
|
257
249
|
int curr_term_len;
|
258
250
|
int field_num;
|
259
251
|
FrtTermEnum *(*set_field)(FrtTermEnum *te, int field_num);
|
260
|
-
char
|
261
|
-
char
|
262
|
-
void
|
252
|
+
char *(*next)(FrtTermEnum *te);
|
253
|
+
char *(*skip_to)(FrtTermEnum *te, const char *term);
|
254
|
+
void (*close)(FrtTermEnum *te);
|
263
255
|
FrtTermEnum *(*clone)(FrtTermEnum *te);
|
264
256
|
};
|
265
257
|
|
@@ -267,59 +259,54 @@ char *frt_te_get_term(struct FrtTermEnum *te);
|
|
267
259
|
FrtTermInfo *frt_te_get_ti(struct FrtTermEnum *te);
|
268
260
|
|
269
261
|
/****************************************************************************
|
270
|
-
*
|
271
262
|
* FrtSegmentTermEnum
|
272
|
-
*
|
273
263
|
****************************************************************************/
|
274
264
|
|
275
|
-
/*
|
265
|
+
/* FrtSegmentTermIndex */
|
276
266
|
|
277
|
-
typedef struct FrtSegmentTermIndex
|
278
|
-
{
|
267
|
+
typedef struct FrtSegmentTermIndex {
|
279
268
|
off_t index_ptr;
|
280
269
|
off_t ptr;
|
281
270
|
int index_cnt;
|
282
271
|
int size;
|
283
|
-
char
|
284
|
-
int
|
285
|
-
FrtTermInfo
|
286
|
-
off_t
|
272
|
+
char **index_terms;
|
273
|
+
int *index_term_lens;
|
274
|
+
FrtTermInfo *index_term_infos;
|
275
|
+
off_t *index_ptrs;
|
287
276
|
} FrtSegmentTermIndex;
|
288
277
|
|
289
|
-
/*
|
278
|
+
/* FrtSegmentFieldIndex */
|
290
279
|
|
291
|
-
typedef struct
|
292
|
-
|
293
|
-
typedef struct FrtSegmentFieldIndex
|
294
|
-
{
|
295
|
-
frt_mutex_t mutex;
|
280
|
+
typedef struct FrtSegmentFieldIndex {
|
281
|
+
frt_mutex_t mutex;
|
296
282
|
int skip_interval;
|
297
283
|
int index_interval;
|
298
284
|
off_t index_ptr;
|
299
|
-
FrtTermEnum
|
300
|
-
FrtHash
|
285
|
+
FrtTermEnum *index_te;
|
286
|
+
FrtHash *field_dict;
|
301
287
|
} FrtSegmentFieldIndex;
|
302
288
|
|
303
|
-
|
304
|
-
extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
|
289
|
+
/* FrtSegmentTermEnum */
|
305
290
|
|
291
|
+
typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
|
306
292
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
FrtTermEnum te;
|
311
|
-
FrtInStream *is;
|
293
|
+
struct FrtSegmentTermEnum {
|
294
|
+
FrtTermEnum te;
|
295
|
+
FrtInStream *is;
|
312
296
|
int size;
|
313
297
|
int pos;
|
314
298
|
int skip_interval;
|
315
299
|
FrtSegmentFieldIndex *sfi;
|
316
300
|
};
|
317
301
|
|
302
|
+
extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
|
303
|
+
extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
|
304
|
+
|
318
305
|
extern void frt_ste_close(FrtTermEnum *te);
|
319
306
|
extern FrtTermEnum *frt_ste_clone(FrtTermEnum *te);
|
320
307
|
extern FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi);
|
321
308
|
|
322
|
-
/*
|
309
|
+
/* MultiTermEnum */
|
323
310
|
|
324
311
|
extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term);
|
325
312
|
|
@@ -329,17 +316,14 @@ extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *t
|
|
329
316
|
*
|
330
317
|
****************************************************************************/
|
331
318
|
|
332
|
-
typedef struct FrtTermInfosReader
|
333
|
-
{
|
319
|
+
typedef struct FrtTermInfosReader {
|
334
320
|
frt_thread_key_t thread_te;
|
335
|
-
void
|
336
|
-
FrtTermEnum
|
337
|
-
int
|
321
|
+
void **te_bucket;
|
322
|
+
FrtTermEnum *orig_te;
|
323
|
+
int field_num;
|
338
324
|
} FrtTermInfosReader;
|
339
325
|
|
340
|
-
extern FrtTermInfosReader *frt_tir_open(FrtStore *store,
|
341
|
-
FrtSegmentFieldIndex *sfi,
|
342
|
-
const char *segment);
|
326
|
+
extern FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, const char *segment);
|
343
327
|
extern FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num);
|
344
328
|
extern FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term);
|
345
329
|
extern char *frt_tir_get_term(FrtTermInfosReader *tir, int pos);
|
@@ -354,34 +338,26 @@ extern void frt_tir_close(FrtTermInfosReader *tir);
|
|
354
338
|
#define FRT_INDEX_INTERVAL 128
|
355
339
|
#define FRT_SKIP_INTERVAL 16
|
356
340
|
|
357
|
-
typedef struct FrtTermWriter
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
FrtTermInfo last_term_info;
|
341
|
+
typedef struct FrtTermWriter {
|
342
|
+
int counter;
|
343
|
+
const char *last_term;
|
344
|
+
FrtTermInfo last_term_info;
|
362
345
|
FrtOutStream *os;
|
363
346
|
} FrtTermWriter;
|
364
347
|
|
365
|
-
typedef struct FrtTermInfosWriter
|
366
|
-
|
367
|
-
int
|
368
|
-
int
|
369
|
-
|
370
|
-
|
371
|
-
FrtOutStream *tfx_out;
|
348
|
+
typedef struct FrtTermInfosWriter {
|
349
|
+
int field_count;
|
350
|
+
int index_interval;
|
351
|
+
int skip_interval;
|
352
|
+
off_t last_index_ptr;
|
353
|
+
FrtOutStream *tfx_out;
|
372
354
|
FrtTermWriter *tix_writer;
|
373
355
|
FrtTermWriter *tis_writer;
|
374
356
|
} FrtTermInfosWriter;
|
375
357
|
|
376
|
-
extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
|
377
|
-
const char *segment,
|
378
|
-
int index_interval,
|
379
|
-
int skip_interval);
|
358
|
+
extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index_interval, int skip_interval);
|
380
359
|
extern void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num);
|
381
|
-
extern void frt_tiw_add(FrtTermInfosWriter *tiw,
|
382
|
-
const char *term,
|
383
|
-
int t_len,
|
384
|
-
FrtTermInfo *ti);
|
360
|
+
extern void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int t_len, FrtTermInfo *ti);
|
385
361
|
extern void frt_tiw_close(FrtTermInfosWriter *tiw);
|
386
362
|
|
387
363
|
/****************************************************************************
|
@@ -391,8 +367,7 @@ extern void frt_tiw_close(FrtTermInfosWriter *tiw);
|
|
391
367
|
****************************************************************************/
|
392
368
|
|
393
369
|
typedef struct FrtTermDocEnum FrtTermDocEnum;
|
394
|
-
struct FrtTermDocEnum
|
395
|
-
{
|
370
|
+
struct FrtTermDocEnum {
|
396
371
|
void (*seek)(FrtTermDocEnum *tde, int field_num, const char *term);
|
397
372
|
void (*seek_te)(FrtTermDocEnum *tde, FrtTermEnum *te);
|
398
373
|
void (*seek_ti)(FrtTermDocEnum *tde, FrtTermInfo *ti);
|
@@ -408,8 +383,7 @@ struct FrtTermDocEnum
|
|
408
383
|
/* * FrtSegmentTermDocEnum * */
|
409
384
|
|
410
385
|
typedef struct FrtSegmentTermDocEnum FrtSegmentTermDocEnum;
|
411
|
-
struct FrtSegmentTermDocEnum
|
412
|
-
{
|
386
|
+
struct FrtSegmentTermDocEnum {
|
413
387
|
FrtTermDocEnum tde;
|
414
388
|
void (*seek_prox)(FrtSegmentTermDocEnum *stde, off_t prx_ptr);
|
415
389
|
void (*skip_prox)(FrtSegmentTermDocEnum *stde);
|
@@ -446,8 +420,7 @@ extern FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir, FrtInStream *frq_in
|
|
446
420
|
* MultipleTermDocPosEnum
|
447
421
|
****************************************************************************/
|
448
422
|
|
449
|
-
extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms,
|
450
|
-
int t_cnt);
|
423
|
+
extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt);
|
451
424
|
|
452
425
|
/****************************************************************************
|
453
426
|
*
|
@@ -455,8 +428,7 @@ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **t
|
|
455
428
|
*
|
456
429
|
****************************************************************************/
|
457
430
|
|
458
|
-
typedef struct FrtOffset
|
459
|
-
{
|
431
|
+
typedef struct FrtOffset {
|
460
432
|
off_t start;
|
461
433
|
off_t end;
|
462
434
|
} FrtOffset;
|
@@ -467,8 +439,7 @@ typedef struct FrtOffset
|
|
467
439
|
*
|
468
440
|
****************************************************************************/
|
469
441
|
|
470
|
-
typedef struct FrtOccurence
|
471
|
-
{
|
442
|
+
typedef struct FrtOccurence {
|
472
443
|
struct FrtOccurence *next;
|
473
444
|
int pos;
|
474
445
|
} FrtOccurence;
|
@@ -479,8 +450,7 @@ typedef struct FrtOccurence
|
|
479
450
|
*
|
480
451
|
****************************************************************************/
|
481
452
|
|
482
|
-
typedef struct FrtPosting
|
483
|
-
{
|
453
|
+
typedef struct FrtPosting {
|
484
454
|
int freq;
|
485
455
|
int doc_num;
|
486
456
|
FrtOccurence *first_occ;
|
@@ -495,17 +465,15 @@ extern FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos);
|
|
495
465
|
*
|
496
466
|
****************************************************************************/
|
497
467
|
|
498
|
-
typedef struct FrtPostingList
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
FrtPosting
|
503
|
-
FrtPosting *last;
|
468
|
+
typedef struct FrtPostingList {
|
469
|
+
const char *term;
|
470
|
+
int term_len;
|
471
|
+
FrtPosting *first;
|
472
|
+
FrtPosting *last;
|
504
473
|
FrtOccurence *last_occ;
|
505
474
|
} FrtPostingList;
|
506
475
|
|
507
|
-
extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
508
|
-
int term_len, FrtPosting *p);
|
476
|
+
extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term, int term_len, FrtPosting *p);
|
509
477
|
extern void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos);
|
510
478
|
extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
|
511
479
|
|
@@ -515,8 +483,7 @@ extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
|
|
515
483
|
*
|
516
484
|
****************************************************************************/
|
517
485
|
|
518
|
-
typedef struct FrtTVField
|
519
|
-
{
|
486
|
+
typedef struct FrtTVField {
|
520
487
|
int field_num;
|
521
488
|
int size;
|
522
489
|
} FrtTVField;
|
@@ -527,11 +494,10 @@ typedef struct FrtTVField
|
|
527
494
|
*
|
528
495
|
****************************************************************************/
|
529
496
|
|
530
|
-
typedef struct FrtTVTerm
|
531
|
-
|
532
|
-
|
533
|
-
int
|
534
|
-
int *positions;
|
497
|
+
typedef struct FrtTVTerm {
|
498
|
+
char *text;
|
499
|
+
int freq;
|
500
|
+
int *positions;
|
535
501
|
} FrtTVTerm;
|
536
502
|
|
537
503
|
/****************************************************************************
|
@@ -541,10 +507,9 @@ typedef struct FrtTVTerm
|
|
541
507
|
****************************************************************************/
|
542
508
|
|
543
509
|
#define FRT_TV_FIELD_INIT_CAPA 8
|
544
|
-
typedef struct FrtTermVector
|
545
|
-
{
|
510
|
+
typedef struct FrtTermVector {
|
546
511
|
int field_num;
|
547
|
-
|
512
|
+
ID field;
|
548
513
|
int term_cnt;
|
549
514
|
FrtTVTerm *terms;
|
550
515
|
int offset_cnt;
|
@@ -563,38 +528,38 @@ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
|
|
563
528
|
****************************************************************************/
|
564
529
|
|
565
530
|
/* * * FrtLazyDocField * * */
|
566
|
-
typedef struct FrtLazyDocFieldData
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
531
|
+
typedef struct FrtLazyDocFieldData {
|
532
|
+
off_t start;
|
533
|
+
int length;
|
534
|
+
rb_encoding *encoding;
|
535
|
+
FrtCompressionType compression; /* as stored */
|
536
|
+
char *text;
|
571
537
|
} FrtLazyDocFieldData;
|
572
538
|
|
573
539
|
typedef struct FrtLazyDoc FrtLazyDoc;
|
574
|
-
typedef struct FrtLazyDocField
|
575
|
-
|
576
|
-
FrtSymbol name;
|
540
|
+
typedef struct FrtLazyDocField {
|
541
|
+
ID name;
|
577
542
|
FrtLazyDocFieldData *data;
|
578
543
|
FrtLazyDoc *doc;
|
579
544
|
int size; /* number of data elements */
|
580
545
|
int len; /* length of data elements concatenated */
|
581
|
-
|
546
|
+
FrtCompressionType compression; /* as configured */
|
547
|
+
bool decompressed;
|
582
548
|
} FrtLazyDocField;
|
583
549
|
|
584
550
|
extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
|
585
551
|
extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len);
|
586
552
|
|
587
553
|
/* * * FrtLazyDoc * * */
|
588
|
-
struct FrtLazyDoc
|
589
|
-
|
590
|
-
|
591
|
-
int size;
|
554
|
+
struct FrtLazyDoc {
|
555
|
+
FrtHash *field_dictionary;
|
556
|
+
int size;
|
592
557
|
FrtLazyDocField **fields;
|
593
|
-
FrtInStream
|
558
|
+
FrtInStream *fields_in;
|
594
559
|
};
|
595
560
|
|
596
561
|
extern void frt_lazy_doc_close(FrtLazyDoc *self);
|
597
|
-
extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self,
|
562
|
+
extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field);
|
598
563
|
|
599
564
|
/****************************************************************************
|
600
565
|
*
|
@@ -602,8 +567,7 @@ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
|
|
602
567
|
*
|
603
568
|
****************************************************************************/
|
604
569
|
|
605
|
-
typedef struct FrtFieldsReader
|
606
|
-
{
|
570
|
+
typedef struct FrtFieldsReader {
|
607
571
|
int size;
|
608
572
|
FrtFieldInfos *fis;
|
609
573
|
FrtStore *store;
|
@@ -611,15 +575,13 @@ typedef struct FrtFieldsReader
|
|
611
575
|
FrtInStream *fdt_in;
|
612
576
|
} FrtFieldsReader;
|
613
577
|
|
614
|
-
extern FrtFieldsReader *frt_fr_open(FrtStore *store,
|
615
|
-
const char *segment, FrtFieldInfos *fis);
|
578
|
+
extern FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
|
616
579
|
extern FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig);
|
617
580
|
extern void frt_fr_close(FrtFieldsReader *fr);
|
618
581
|
extern FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num);
|
619
582
|
extern FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num);
|
620
583
|
extern FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num);
|
621
|
-
extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
|
622
|
-
int field_num);
|
584
|
+
extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num, int field_num);
|
623
585
|
|
624
586
|
/****************************************************************************
|
625
587
|
*
|
@@ -627,18 +589,16 @@ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
|
|
627
589
|
*
|
628
590
|
****************************************************************************/
|
629
591
|
|
630
|
-
typedef struct FrtFieldsWriter
|
631
|
-
{
|
592
|
+
typedef struct FrtFieldsWriter {
|
632
593
|
FrtFieldInfos *fis;
|
633
594
|
FrtOutStream *fdt_out;
|
634
595
|
FrtOutStream *fdx_out;
|
635
596
|
FrtOutStream *buffer;
|
636
597
|
FrtTVField *tv_fields;
|
637
|
-
off_t
|
598
|
+
off_t start_ptr;
|
638
599
|
} FrtFieldsWriter;
|
639
600
|
|
640
|
-
extern FrtFieldsWriter *frt_fw_open(FrtStore *store,
|
641
|
-
const char *segment, FrtFieldInfos *fis);
|
601
|
+
extern FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
|
642
602
|
extern void frt_fw_close(FrtFieldsWriter *fw);
|
643
603
|
extern void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc);
|
644
604
|
extern void frt_fw_add_postings(FrtFieldsWriter *fw,
|
@@ -659,11 +619,10 @@ extern void frt_fw_write_tv_index(FrtFieldsWriter *fw);
|
|
659
619
|
*
|
660
620
|
****************************************************************************/
|
661
621
|
|
662
|
-
struct FrtDeleter
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
FrtHashSet *pending;
|
622
|
+
struct FrtDeleter {
|
623
|
+
FrtStore *store;
|
624
|
+
FrtSegmentInfos *sis;
|
625
|
+
FrtHashSet *pending;
|
667
626
|
};
|
668
627
|
|
669
628
|
extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
|
@@ -681,88 +640,115 @@ extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt
|
|
681
640
|
#define FRT_WRITE_LOCK_NAME "write"
|
682
641
|
#define FRT_COMMIT_LOCK_NAME "commit"
|
683
642
|
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
void
|
709
|
-
void
|
710
|
-
void
|
711
|
-
|
712
|
-
|
713
|
-
void
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
FrtHash
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
bool
|
727
|
-
bool
|
643
|
+
typedef enum {
|
644
|
+
FRT_INDEX_READER,
|
645
|
+
FRT_SEGMENT_READER,
|
646
|
+
FRT_MULTI_READER
|
647
|
+
} frt_index_reader_t;
|
648
|
+
|
649
|
+
struct FrtIndexReader {
|
650
|
+
int type;
|
651
|
+
int (*num_docs)(FrtIndexReader *ir);
|
652
|
+
int (*max_doc)(FrtIndexReader *ir);
|
653
|
+
FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
|
654
|
+
FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
|
655
|
+
frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
|
656
|
+
frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num, frt_uchar *buf);
|
657
|
+
FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
|
658
|
+
FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num, const char *term);
|
659
|
+
int (*doc_freq)(FrtIndexReader *ir, int field_num, const char *term);
|
660
|
+
FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
|
661
|
+
FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
|
662
|
+
FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num, ID field);
|
663
|
+
FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
|
664
|
+
bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
|
665
|
+
bool (*has_deletions)(FrtIndexReader *ir);
|
666
|
+
void (*acquire_write_lock)(FrtIndexReader *ir);
|
667
|
+
void (*set_norm_i)(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val);
|
668
|
+
void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
|
669
|
+
void (*undelete_all_i)(FrtIndexReader *ir);
|
670
|
+
void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
|
671
|
+
bool (*is_latest_i)(FrtIndexReader *ir);
|
672
|
+
void (*commit_i)(FrtIndexReader *ir);
|
673
|
+
void (*close_i)(FrtIndexReader *ir);
|
674
|
+
int ref_cnt;
|
675
|
+
FrtDeleter *deleter;
|
676
|
+
FrtStore *store;
|
677
|
+
FrtLock *write_lock;
|
678
|
+
FrtSegmentInfos *sis;
|
679
|
+
FrtFieldInfos *fis;
|
680
|
+
FrtHash *cache;
|
681
|
+
FrtHash *field_index_cache;
|
682
|
+
frt_mutex_t field_index_mutex;
|
683
|
+
frt_uchar *fake_norms;
|
684
|
+
frt_mutex_t mutex;
|
685
|
+
bool has_changes : 1;
|
686
|
+
bool is_stale : 1;
|
687
|
+
bool is_owner : 1;
|
688
|
+
VALUE rir;
|
728
689
|
};
|
729
690
|
|
730
|
-
extern FrtIndexReader *frt_ir_open(FrtStore *store);
|
691
|
+
extern FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store);
|
731
692
|
extern void frt_ir_close(FrtIndexReader *ir);
|
732
693
|
extern void frt_ir_commit(FrtIndexReader *ir);
|
733
694
|
extern void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num);
|
734
695
|
extern void frt_ir_undelete_all(FrtIndexReader *ir);
|
735
|
-
extern int frt_ir_doc_freq(FrtIndexReader *ir,
|
736
|
-
extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num,
|
696
|
+
extern int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term);
|
697
|
+
extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val);
|
737
698
|
extern frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num);
|
738
|
-
extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir,
|
739
|
-
extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir,
|
740
|
-
extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir,
|
741
|
-
extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir,
|
742
|
-
extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir,
|
743
|
-
extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir,
|
744
|
-
extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir,
|
699
|
+
extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, ID field);
|
700
|
+
extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf);
|
701
|
+
extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, ID field, const char *term);
|
702
|
+
extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, ID field);
|
703
|
+
extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, ID field, const char *t);
|
704
|
+
extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, ID field, const char *term);
|
705
|
+
extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const char *t);
|
745
706
|
extern void frt_ir_add_cache(FrtIndexReader *ir);
|
746
707
|
extern bool frt_ir_is_latest(FrtIndexReader *ir);
|
747
708
|
|
709
|
+
/****************************************************************************
|
710
|
+
* FrtSegmentReader
|
711
|
+
****************************************************************************/
|
712
|
+
|
713
|
+
struct FrtSegmentReader {
|
714
|
+
FrtIndexReader ir;
|
715
|
+
FrtSegmentInfo *si;
|
716
|
+
char *segment;
|
717
|
+
FrtFieldsReader *fr;
|
718
|
+
FrtBitVector *deleted_docs;
|
719
|
+
FrtInStream *frq_in;
|
720
|
+
FrtInStream *prx_in;
|
721
|
+
FrtSegmentFieldIndex *sfi;
|
722
|
+
FrtTermInfosReader *tir;
|
723
|
+
frt_thread_key_t thread_fr;
|
724
|
+
void **fr_bucket;
|
725
|
+
FrtHash *norms;
|
726
|
+
FrtStore *cfs_store;
|
727
|
+
bool deleted_docs_dirty : 1;
|
728
|
+
bool undelete_all : 1;
|
729
|
+
bool norms_dirty : 1;
|
730
|
+
};
|
731
|
+
|
732
|
+
extern FrtSegmentReader *frt_sr_alloc();
|
733
|
+
|
748
734
|
/****************************************************************************
|
749
735
|
* FrtMultiReader
|
750
736
|
****************************************************************************/
|
751
737
|
|
752
738
|
struct FrtMultiReader {
|
753
739
|
FrtIndexReader ir;
|
754
|
-
int
|
755
|
-
int
|
756
|
-
int
|
757
|
-
int
|
740
|
+
int max_doc;
|
741
|
+
int num_docs_cache;
|
742
|
+
int r_cnt;
|
743
|
+
int *starts;
|
758
744
|
FrtIndexReader **sub_readers;
|
759
|
-
FrtHash
|
760
|
-
bool
|
761
|
-
int
|
745
|
+
FrtHash *norms_cache;
|
746
|
+
bool has_deletions : 1;
|
747
|
+
int **field_num_map;
|
762
748
|
};
|
763
749
|
|
764
750
|
extern int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num);
|
765
|
-
extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt);
|
751
|
+
extern FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, const int r_cnt);
|
766
752
|
|
767
753
|
/****************************************************************************
|
768
754
|
*
|
@@ -770,16 +756,15 @@ extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt
|
|
770
756
|
*
|
771
757
|
****************************************************************************/
|
772
758
|
|
773
|
-
typedef struct FrtFieldInverter
|
774
|
-
|
775
|
-
|
776
|
-
frt_uchar *norms;
|
759
|
+
typedef struct FrtFieldInverter {
|
760
|
+
FrtHash *plists;
|
761
|
+
frt_uchar *norms;
|
777
762
|
FrtFieldInfo *fi;
|
778
|
-
int
|
779
|
-
bool
|
780
|
-
bool
|
781
|
-
bool
|
782
|
-
bool
|
763
|
+
int length;
|
764
|
+
bool is_tokenized : 1;
|
765
|
+
bool store_term_vector : 1;
|
766
|
+
bool store_offsets : 1;
|
767
|
+
bool has_norms : 1;
|
783
768
|
} FrtFieldInverter;
|
784
769
|
|
785
770
|
/****************************************************************************
|
@@ -791,18 +776,17 @@ typedef struct FrtFieldInverter
|
|
791
776
|
#define DW_OFFSET_INIT_CAPA 512
|
792
777
|
typedef struct FrtIndexWriter FrtIndexWriter;
|
793
778
|
|
794
|
-
typedef struct FrtDocWriter
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
FrtFieldInfos *fis;
|
779
|
+
typedef struct FrtDocWriter {
|
780
|
+
FrtStore *store;
|
781
|
+
FrtSegmentInfo *si;
|
782
|
+
FrtFieldInfos *fis;
|
799
783
|
FrtFieldsWriter *fw;
|
800
|
-
FrtMemoryPool
|
801
|
-
FrtAnalyzer
|
802
|
-
FrtHash
|
803
|
-
FrtHash
|
804
|
-
FrtSimilarity
|
805
|
-
FrtOffset
|
784
|
+
FrtMemoryPool *mp;
|
785
|
+
FrtAnalyzer *analyzer;
|
786
|
+
FrtHash *curr_plists;
|
787
|
+
FrtHash *fields;
|
788
|
+
FrtSimilarity *similarity;
|
789
|
+
FrtOffset *offsets;
|
806
790
|
int offsets_size;
|
807
791
|
int offsets_capa;
|
808
792
|
int doc_num;
|
@@ -817,9 +801,7 @@ extern void frt_dw_close(FrtDocWriter *dw);
|
|
817
801
|
extern void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc);
|
818
802
|
extern void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si);
|
819
803
|
/* For testing. need to remove somehow. FIXME */
|
820
|
-
extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
|
821
|
-
FrtFieldInverter *fld_inv,
|
822
|
-
FrtDocField *df);
|
804
|
+
extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDocField *df);
|
823
805
|
extern FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi);
|
824
806
|
extern void frt_dw_reset_postings(FrtHash *postings);
|
825
807
|
|
@@ -829,25 +811,25 @@ extern void frt_dw_reset_postings(FrtHash *postings);
|
|
829
811
|
*
|
830
812
|
****************************************************************************/
|
831
813
|
|
832
|
-
struct FrtIndexWriter
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
FrtAnalyzer *analyzer;
|
814
|
+
struct FrtIndexWriter {
|
815
|
+
FrtConfig config;
|
816
|
+
frt_mutex_t mutex;
|
817
|
+
FrtStore *store;
|
818
|
+
FrtAnalyzer *analyzer;
|
838
819
|
FrtSegmentInfos *sis;
|
839
|
-
FrtFieldInfos
|
840
|
-
FrtDocWriter
|
841
|
-
FrtSimilarity
|
842
|
-
FrtLock
|
843
|
-
FrtDeleter
|
820
|
+
FrtFieldInfos *fis;
|
821
|
+
FrtDocWriter *dw;
|
822
|
+
FrtSimilarity *similarity;
|
823
|
+
FrtLock *write_lock;
|
824
|
+
FrtDeleter *deleter;
|
844
825
|
};
|
845
826
|
|
846
827
|
extern void frt_index_create(FrtStore *store, FrtFieldInfos *fis);
|
847
828
|
extern bool frt_index_is_locked(FrtStore *store);
|
848
|
-
extern FrtIndexWriter *
|
849
|
-
extern
|
850
|
-
extern void
|
829
|
+
extern FrtIndexWriter *frt_iw_alloc();
|
830
|
+
extern FrtIndexWriter *frt_iw_open(FrtIndexWriter *, FrtStore *store, FrtAnalyzer *analyzer, const FrtConfig *config);
|
831
|
+
extern void frt_iw_delete_term(FrtIndexWriter *iw, ID field, const char *term);
|
832
|
+
extern void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int term_cnt);
|
851
833
|
extern void frt_iw_close(FrtIndexWriter *iw);
|
852
834
|
extern void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc);
|
853
835
|
extern int frt_iw_doc_count(FrtIndexWriter *iw);
|
@@ -862,17 +844,16 @@ extern void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, con
|
|
862
844
|
****************************************************************************/
|
863
845
|
|
864
846
|
#define FRT_CW_INIT_CAPA 16
|
865
|
-
typedef struct FrtCWFileEntry
|
866
|
-
|
867
|
-
char *name;
|
847
|
+
typedef struct FrtCWFileEntry {
|
848
|
+
char *name;
|
868
849
|
off_t dir_offset;
|
869
850
|
off_t data_offset;
|
870
851
|
} FrtCWFileEntry;
|
871
852
|
|
872
853
|
typedef struct FrtCompoundWriter {
|
873
|
-
FrtStore
|
874
|
-
const char
|
875
|
-
FrtHashSet
|
854
|
+
FrtStore *store;
|
855
|
+
const char *name;
|
856
|
+
FrtHashSet *ids;
|
876
857
|
FrtCWFileEntry *file_entries;
|
877
858
|
} FrtCompoundWriter;
|
878
859
|
|