isomorfeus-ferret 0.17.1 → 0.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/bm_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -160
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +50 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +482 -0
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +241 -0
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -49
- data/ext/isomorfeus_ferret_ext/frt_ind.h +0 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +296 -1857
- data/ext/isomorfeus_ferret_ext/frt_index.h +2 -145
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +29 -0
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +19 -0
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +93 -0
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +33 -0
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +334 -0
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +198 -0
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_ram_store.h +12 -0
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -536
- data/ext/isomorfeus_ferret_ext/frt_store.h +90 -495
- data/ext/isomorfeus_ferret_ext/frt_stream.h +18 -0
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -24
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -495
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +15 -30
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -12
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +43 -4
@@ -1,4 +1,6 @@
|
|
1
1
|
#include "frt_global.h"
|
2
|
+
#include "frt_lazy_doc_field.h"
|
3
|
+
#include "frt_lazy_doc.h"
|
2
4
|
#include "frt_index.h"
|
3
5
|
#include "frt_similarity.h"
|
4
6
|
#include "frt_helper.h"
|
@@ -6,13 +8,6 @@
|
|
6
8
|
#include <string.h>
|
7
9
|
#include <limits.h>
|
8
10
|
#include <ctype.h>
|
9
|
-
#include "brotli_decode.h"
|
10
|
-
#include "brotli_encode.h"
|
11
|
-
#include "bzlib.h"
|
12
|
-
#include "lz4frame.h"
|
13
|
-
|
14
|
-
// #undef close
|
15
|
-
// #undef read
|
16
11
|
|
17
12
|
extern rb_encoding *utf8_encoding;
|
18
13
|
extern void frt_micro_sleep(const int micro_seconds);
|
@@ -46,19 +41,15 @@ static char *ste_next(FrtTermEnum *te);
|
|
46
41
|
#define FORMAT 15
|
47
42
|
#define SEGMENTS_GEN_FILE_NAME "segments"
|
48
43
|
#define MAX_EXT_LEN 10
|
49
|
-
#define FRT_COMPRESSION_BUFFER_SIZE 16348
|
50
|
-
#define FRT_BROTLI_COMPRESSION_LEVEL 4
|
51
|
-
#define FRT_BZIP_COMPRESSION_LEVEL 9
|
52
44
|
|
53
45
|
/* *** Must be three characters *** */
|
54
46
|
static const char *INDEX_EXTENSIONS[] = {
|
55
|
-
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
|
47
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
|
56
48
|
};
|
57
49
|
|
58
50
|
static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
59
51
|
|
60
|
-
static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
|
61
|
-
{
|
52
|
+
static char *u64_to_str36(char *buf, int buf_size, frt_u64 u) {
|
62
53
|
int i = buf_size - 1;
|
63
54
|
buf[i] = '\0';
|
64
55
|
for (i--; i >= 0; i--) {
|
@@ -75,17 +66,14 @@ static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
|
|
75
66
|
return buf + i;
|
76
67
|
}
|
77
68
|
|
78
|
-
static frt_u64 str36_to_u64(char *p)
|
79
|
-
{
|
69
|
+
static frt_u64 str36_to_u64(char *p) {
|
80
70
|
frt_u64 u = 0;
|
81
71
|
while (true) {
|
82
72
|
if ('0' <= *p && '9' >= *p) {
|
83
73
|
u = u * 36 + *p - '0';
|
84
|
-
}
|
85
|
-
else if ('a' <= *p && 'z' >= *p) {
|
74
|
+
} else if ('a' <= *p && 'z' >= *p) {
|
86
75
|
u = u * 36 + *p - 'a' + 10;
|
87
|
-
}
|
88
|
-
else {
|
76
|
+
} else {
|
89
77
|
break;
|
90
78
|
}
|
91
79
|
p++;
|
@@ -142,12 +130,10 @@ static char *fn_for_gen_field(char *buf,
|
|
142
130
|
const char *base,
|
143
131
|
const char *ext,
|
144
132
|
frt_i64 gen,
|
145
|
-
int field_num)
|
146
|
-
{
|
133
|
+
int field_num) {
|
147
134
|
if (-1 == gen) {
|
148
135
|
return NULL;
|
149
|
-
}
|
150
|
-
else {
|
136
|
+
} else {
|
151
137
|
char b[FRT_SEGMENT_NAME_MAX_LENGTH];
|
152
138
|
sprintf(buf, "%s_%s.%s%d",
|
153
139
|
base,
|
@@ -164,18 +150,15 @@ static char *fn_for_gen_field(char *buf,
|
|
164
150
|
*
|
165
151
|
***************************************************************************/
|
166
152
|
|
167
|
-
static unsigned long co_hash(const void *key)
|
168
|
-
|
169
|
-
return (unsigned long)key;
|
153
|
+
static unsigned long co_hash(const void *key) {
|
154
|
+
return (unsigned long)(uintptr_t)key;
|
170
155
|
}
|
171
156
|
|
172
|
-
static int co_eq(const void *key1, const void *key2)
|
173
|
-
{
|
157
|
+
static int co_eq(const void *key1, const void *key2) {
|
174
158
|
return (key1 == key2);
|
175
159
|
}
|
176
160
|
|
177
|
-
static void co_destroy(FrtCacheObject *self)
|
178
|
-
{
|
161
|
+
static void co_destroy(FrtCacheObject *self) {
|
179
162
|
frt_h_rem(self->ref_tab1, self->ref2, false);
|
180
163
|
frt_h_rem(self->ref_tab2, self->ref1, false);
|
181
164
|
self->destroy(self->obj);
|
@@ -183,8 +166,7 @@ static void co_destroy(FrtCacheObject *self)
|
|
183
166
|
}
|
184
167
|
|
185
168
|
FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
|
186
|
-
void *ref1, void *ref2, frt_free_ft destroy, void *obj)
|
187
|
-
{
|
169
|
+
void *ref1, void *ref2, frt_free_ft destroy, void *obj) {
|
188
170
|
FrtCacheObject *self = FRT_ALLOC(FrtCacheObject);
|
189
171
|
frt_h_set(ref_tab1, ref2, self);
|
190
172
|
frt_h_set(ref_tab2, ref1, self);
|
@@ -201,302 +183,13 @@ FrtHash *frt_co_hash_create(void) {
|
|
201
183
|
return frt_h_new(&co_hash, &co_eq, (frt_free_ft)NULL, (frt_free_ft)&co_destroy);
|
202
184
|
}
|
203
185
|
|
204
|
-
/****************************************************************************
|
205
|
-
*
|
206
|
-
* FieldInfo
|
207
|
-
*
|
208
|
-
****************************************************************************/
|
209
|
-
|
210
|
-
static void fi_check_params(unsigned int bits) {
|
211
|
-
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
212
|
-
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
213
|
-
}
|
214
|
-
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
215
|
-
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
216
|
-
}
|
217
|
-
}
|
218
|
-
|
219
|
-
FrtFieldInfo *frt_fi_alloc(void) {
|
220
|
-
return FRT_ALLOC(FrtFieldInfo);
|
221
|
-
}
|
222
|
-
|
223
|
-
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
224
|
-
assert(NULL != name);
|
225
|
-
fi_check_params(bits);
|
226
|
-
fi->name = name;
|
227
|
-
fi->boost = 1.0f;
|
228
|
-
fi->bits = bits;
|
229
|
-
fi->number = 0;
|
230
|
-
fi->ref_cnt = 1;
|
231
|
-
fi->rfi = Qnil;
|
232
|
-
return fi;
|
233
|
-
}
|
234
|
-
|
235
|
-
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
236
|
-
FrtFieldInfo *fi = frt_fi_alloc();
|
237
|
-
return frt_fi_init(fi, name, bits);
|
238
|
-
}
|
239
|
-
|
240
|
-
void frt_fi_deref(FrtFieldInfo *fi) {
|
241
|
-
if (FRT_DEREF(fi) == 0) free(fi);
|
242
|
-
}
|
243
|
-
|
244
|
-
FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
|
245
|
-
if (bits_is_compressed(fi->bits)) {
|
246
|
-
if (bits_is_compressed_brotli(fi->bits)) {
|
247
|
-
return FRT_COMPRESSION_BROTLI;
|
248
|
-
} else if (bits_is_compressed_bz2(fi->bits)) {
|
249
|
-
return FRT_COMPRESSION_BZ2;
|
250
|
-
} else if (bits_is_compressed_lz4(fi->bits)) {
|
251
|
-
return FRT_COMPRESSION_LZ4;
|
252
|
-
} else {
|
253
|
-
return FRT_COMPRESSION_BROTLI;
|
254
|
-
}
|
255
|
-
} else {
|
256
|
-
return FRT_COMPRESSION_NONE;
|
257
|
-
}
|
258
|
-
}
|
259
|
-
|
260
|
-
char *frt_fi_to_s(FrtFieldInfo *fi)
|
261
|
-
{
|
262
|
-
const char *fi_name = rb_id2name(fi->name);
|
263
|
-
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
264
|
-
char *s = str;
|
265
|
-
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
266
|
-
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
267
|
-
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
268
|
-
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
269
|
-
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
270
|
-
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
271
|
-
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
272
|
-
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
273
|
-
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
274
|
-
s -= 2;
|
275
|
-
if (*s != ',') {
|
276
|
-
s += 2;
|
277
|
-
}
|
278
|
-
|
279
|
-
sprintf(s, ")]");
|
280
|
-
return str;
|
281
|
-
}
|
282
|
-
|
283
|
-
/****************************************************************************
|
284
|
-
*
|
285
|
-
* FieldInfos
|
286
|
-
*
|
287
|
-
****************************************************************************/
|
288
|
-
|
289
|
-
FrtFieldInfos *frt_fis_alloc(void) {
|
290
|
-
return FRT_ALLOC(FrtFieldInfos);
|
291
|
-
}
|
292
|
-
|
293
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
294
|
-
fi_check_params(bits);
|
295
|
-
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
296
|
-
fis->size = 0;
|
297
|
-
fis->capa = FIELD_INFOS_INIT_CAPA;
|
298
|
-
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
299
|
-
fis->bits = bits;
|
300
|
-
fis->ref_cnt = 1;
|
301
|
-
fis->rfis = Qnil;
|
302
|
-
return fis;
|
303
|
-
}
|
304
|
-
|
305
|
-
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
306
|
-
FrtFieldInfos *fis = frt_fis_alloc();
|
307
|
-
return frt_fis_init(fis, bits);
|
308
|
-
}
|
309
|
-
|
310
|
-
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
311
|
-
if (fis->size == fis->capa) {
|
312
|
-
fis->capa <<= 1;
|
313
|
-
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
314
|
-
}
|
315
|
-
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
316
|
-
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
317
|
-
}
|
318
|
-
FRT_REF(fi);
|
319
|
-
fi->number = fis->size;
|
320
|
-
fis->fields[fis->size] = fi;
|
321
|
-
fis->size++;
|
322
|
-
return fi;
|
323
|
-
}
|
324
|
-
|
325
|
-
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
326
|
-
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
327
|
-
}
|
328
|
-
|
329
|
-
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
330
|
-
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
331
|
-
if (fi) { return fi->number; }
|
332
|
-
else { return -1; }
|
333
|
-
}
|
334
|
-
|
335
|
-
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
336
|
-
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
337
|
-
if (!fi) {
|
338
|
-
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
339
|
-
frt_fis_add_field(fis, fi);
|
340
|
-
}
|
341
|
-
return fi;
|
342
|
-
}
|
343
|
-
|
344
|
-
FrtFieldInfos *frt_fis_read(FrtInStream *is)
|
345
|
-
{
|
346
|
-
FrtFieldInfos *volatile fis = NULL;
|
347
|
-
char *field_name;
|
348
|
-
FRT_TRY
|
349
|
-
do {
|
350
|
-
volatile int i;
|
351
|
-
union { frt_u32 i; float f; } tmp;
|
352
|
-
FrtFieldInfo *volatile fi;
|
353
|
-
fis = frt_fis_new(frt_is_read_vint(is));
|
354
|
-
for (i = frt_is_read_vint(is); i > 0; i--) {
|
355
|
-
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
356
|
-
FRT_TRY
|
357
|
-
field_name = frt_is_read_string_safe(is);
|
358
|
-
fi->name = rb_intern(field_name);
|
359
|
-
free(field_name);
|
360
|
-
tmp.i = frt_is_read_u32(is);
|
361
|
-
fi->boost = tmp.f;
|
362
|
-
fi->bits = frt_is_read_vint(is);
|
363
|
-
FRT_XCATCHALL
|
364
|
-
free(fi);
|
365
|
-
FRT_XENDTRY
|
366
|
-
frt_fis_add_field(fis, fi);
|
367
|
-
fi->ref_cnt = 1;
|
368
|
-
}
|
369
|
-
} while (0);
|
370
|
-
FRT_XCATCHALL
|
371
|
-
frt_fis_deref(fis);
|
372
|
-
FRT_XENDTRY
|
373
|
-
return fis;
|
374
|
-
}
|
375
|
-
|
376
|
-
void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
377
|
-
{
|
378
|
-
int i;
|
379
|
-
union { frt_u32 i; float f; } tmp;
|
380
|
-
FrtFieldInfo *fi;
|
381
|
-
const int fis_size = fis->size;
|
382
|
-
|
383
|
-
frt_os_write_vint(os, fis->bits);
|
384
|
-
frt_os_write_vint(os, fis->size);
|
385
|
-
|
386
|
-
for (i = 0; i < fis_size; i++) {
|
387
|
-
fi = fis->fields[i];
|
388
|
-
|
389
|
-
frt_os_write_string(os, rb_id2name(fi->name));
|
390
|
-
tmp.f = fi->boost;
|
391
|
-
frt_os_write_u32(os, tmp.i);
|
392
|
-
frt_os_write_vint(os, fi->bits);
|
393
|
-
}
|
394
|
-
}
|
395
|
-
|
396
|
-
static const char *store_str[] = {
|
397
|
-
":no",
|
398
|
-
":yes",
|
399
|
-
"",
|
400
|
-
":compressed"
|
401
|
-
};
|
402
|
-
|
403
|
-
static const char *fi_store_str(FrtFieldInfo *fi)
|
404
|
-
{
|
405
|
-
return store_str[fi->bits & 0x3];
|
406
|
-
}
|
407
|
-
|
408
|
-
static const char *index_str[] = {
|
409
|
-
":no",
|
410
|
-
":untokenized",
|
411
|
-
"",
|
412
|
-
":yes",
|
413
|
-
"",
|
414
|
-
":untokenized_omit_norms",
|
415
|
-
"",
|
416
|
-
":omit_norms"
|
417
|
-
};
|
418
|
-
|
419
|
-
static const char *fi_index_str(FrtFieldInfo *fi)
|
420
|
-
{
|
421
|
-
return index_str[(fi->bits >> 2) & 0x7];
|
422
|
-
}
|
423
|
-
|
424
|
-
static const char *term_vector_str[] = {
|
425
|
-
":no",
|
426
|
-
":yes",
|
427
|
-
"",
|
428
|
-
":with_positions",
|
429
|
-
"",
|
430
|
-
":with_offsets",
|
431
|
-
"",
|
432
|
-
":with_positions_offsets"
|
433
|
-
};
|
434
|
-
|
435
|
-
static const char *fi_term_vector_str(FrtFieldInfo *fi)
|
436
|
-
{
|
437
|
-
return term_vector_str[(fi->bits >> 5) & 0x7];
|
438
|
-
}
|
439
|
-
|
440
|
-
char *frt_fis_to_s(FrtFieldInfos *fis)
|
441
|
-
{
|
442
|
-
int i, pos, capa = 200 + fis->size * 120;
|
443
|
-
char *buf = FRT_ALLOC_N(char, capa);
|
444
|
-
FrtFieldInfo *fi;
|
445
|
-
const int fis_size = fis->size;
|
446
|
-
|
447
|
-
pos = sprintf(buf,
|
448
|
-
"default:\n"
|
449
|
-
" store: %s\n"
|
450
|
-
" index: %s\n"
|
451
|
-
" term_vector: %s\n"
|
452
|
-
"fields:\n",
|
453
|
-
store_str[fis->bits & 0x3],
|
454
|
-
index_str[(fis->bits >> 2) & 0x7],
|
455
|
-
term_vector_str[(fis->bits >> 5) & 0x7]);
|
456
|
-
for (i = 0; i < fis_size; i++) {
|
457
|
-
fi = fis->fields[i];
|
458
|
-
pos += sprintf(buf + pos,
|
459
|
-
" %s:\n"
|
460
|
-
" boost: %f\n"
|
461
|
-
" store: %s\n"
|
462
|
-
" index: %s\n"
|
463
|
-
" term_vector: %s\n",
|
464
|
-
rb_id2name(fi->name), fi->boost, fi_store_str(fi),
|
465
|
-
fi_index_str(fi), fi_term_vector_str(fi));
|
466
|
-
}
|
467
|
-
|
468
|
-
return buf;
|
469
|
-
}
|
470
|
-
|
471
|
-
void frt_fis_deref(FrtFieldInfos *fis) {
|
472
|
-
if (FRT_DEREF(fis) == 0) {
|
473
|
-
frt_h_destroy(fis->field_dict);
|
474
|
-
free(fis->fields);
|
475
|
-
free(fis);
|
476
|
-
}
|
477
|
-
}
|
478
|
-
|
479
|
-
static bool fis_has_vectors(FrtFieldInfos *fis)
|
480
|
-
{
|
481
|
-
int i;
|
482
|
-
const int fis_size = fis->size;
|
483
|
-
|
484
|
-
for (i = 0; i < fis_size; i++) {
|
485
|
-
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
486
|
-
return true;
|
487
|
-
}
|
488
|
-
}
|
489
|
-
return false;
|
490
|
-
}
|
491
|
-
|
492
186
|
/****************************************************************************
|
493
187
|
*
|
494
188
|
* SegmentInfo
|
495
189
|
*
|
496
190
|
****************************************************************************/
|
497
191
|
|
498
|
-
FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
|
499
|
-
{
|
192
|
+
FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store) {
|
500
193
|
FrtSegmentInfo *si = FRT_ALLOC(FrtSegmentInfo);
|
501
194
|
si->name = name;
|
502
195
|
si->doc_cnt = doc_cnt;
|
@@ -509,8 +202,7 @@ FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
|
|
509
202
|
return si;
|
510
203
|
}
|
511
204
|
|
512
|
-
static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
|
513
|
-
{
|
205
|
+
static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is) {
|
514
206
|
FrtSegmentInfo *volatile si = FRT_ALLOC_AND_ZERO(FrtSegmentInfo);
|
515
207
|
FRT_TRY
|
516
208
|
si->store = store;
|
@@ -535,8 +227,7 @@ static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
|
|
535
227
|
return si;
|
536
228
|
}
|
537
229
|
|
538
|
-
static void si_write(FrtSegmentInfo *si, FrtOutStream *os)
|
539
|
-
{
|
230
|
+
static void si_write(FrtSegmentInfo *si, FrtOutStream *os) {
|
540
231
|
frt_os_write_string(os, si->name);
|
541
232
|
frt_os_write_vint(os, si->doc_cnt);
|
542
233
|
frt_os_write_vint(os, si->del_gen);
|
@@ -558,13 +249,11 @@ void frt_si_close(FrtSegmentInfo *si) {
|
|
558
249
|
}
|
559
250
|
}
|
560
251
|
|
561
|
-
bool frt_si_has_deletions(FrtSegmentInfo *si)
|
562
|
-
{
|
252
|
+
bool frt_si_has_deletions(FrtSegmentInfo *si) {
|
563
253
|
return si->del_gen >= 0;
|
564
254
|
}
|
565
255
|
|
566
|
-
void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
|
567
|
-
{
|
256
|
+
void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num) {
|
568
257
|
if (field_num >= si->norm_gens_size) {
|
569
258
|
int i;
|
570
259
|
FRT_REALLOC_N(si->norm_gens, int, field_num + 1);
|
@@ -576,8 +265,7 @@ void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
|
|
576
265
|
si->norm_gens[field_num]++;
|
577
266
|
}
|
578
267
|
|
579
|
-
static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
|
580
|
-
{
|
268
|
+
static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num) {
|
581
269
|
int norm_gen;
|
582
270
|
if (field_num >= si->norm_gens_size
|
583
271
|
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
@@ -590,38 +278,13 @@ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
|
|
590
278
|
|
591
279
|
void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
592
280
|
|
593
|
-
static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *dlr)
|
594
|
-
{
|
595
|
-
int i;
|
596
|
-
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
597
|
-
size_t seg_len = strlen(si->name);
|
598
|
-
char *ext;
|
599
|
-
|
600
|
-
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
601
|
-
if (0 <= si->norm_gens[i]) {
|
602
|
-
frt_deleter_queue_file(dlr, si_norm_file_name(si, file_name, fis->fields[i]->number));
|
603
|
-
}
|
604
|
-
}
|
605
|
-
|
606
|
-
memcpy(file_name, si->name, seg_len);
|
607
|
-
file_name[seg_len] = '.';
|
608
|
-
ext = file_name + seg_len + 1;
|
609
|
-
|
610
|
-
for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
|
611
|
-
memcpy(ext, INDEX_EXTENSIONS[i], 4);
|
612
|
-
frt_deleter_queue_file(dlr, file_name);
|
613
|
-
}
|
614
|
-
}
|
615
|
-
|
616
281
|
/****************************************************************************
|
617
282
|
*
|
618
283
|
* SegmentInfos
|
619
284
|
*
|
620
285
|
****************************************************************************/
|
621
286
|
|
622
|
-
|
623
|
-
static char *new_segment(frt_i64 generation)
|
624
|
-
{
|
287
|
+
static char *new_segment(frt_i64 generation) {
|
625
288
|
char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
|
626
289
|
char *fn_p = u64_to_str36(buf, FRT_SEGMENT_NAME_MAX_LENGTH - 1,
|
627
290
|
(frt_u64)generation);
|
@@ -642,8 +305,7 @@ typedef struct FindSegmentsFile {
|
|
642
305
|
} ret;
|
643
306
|
} FindSegmentsFile;
|
644
307
|
|
645
|
-
static void which_gen_i(const char *file_name, void *arg)
|
646
|
-
{
|
308
|
+
static void which_gen_i(const char *file_name, void *arg) {
|
647
309
|
frt_i64 *max_generation = (frt_i64 *)arg;
|
648
310
|
if (0 == strncmp(FRT_SEGMENTS_FILE_NAME"_", file_name,
|
649
311
|
sizeof(FRT_SEGMENTS_FILE_NAME))) {
|
@@ -689,10 +351,9 @@ void frt_sis_put(FrtSegmentInfos *sis, FILE *stream) {
|
|
689
351
|
*
|
690
352
|
* @param store - the Store to look in
|
691
353
|
*/
|
692
|
-
frt_i64 frt_sis_current_segment_generation(FrtStore *store)
|
693
|
-
{
|
354
|
+
frt_i64 frt_sis_current_segment_generation(FrtStore *store) {
|
694
355
|
frt_i64 current_generation = -1;
|
695
|
-
store->each(store, &which_gen_i, ¤t_generation);
|
356
|
+
store->each(store, segm_idx_name, &which_gen_i, ¤t_generation);
|
696
357
|
return current_generation;
|
697
358
|
}
|
698
359
|
|
@@ -703,8 +364,7 @@ frt_i64 frt_sis_current_segment_generation(FrtStore *store)
|
|
703
364
|
* @param store - the Store to look in
|
704
365
|
* @return segments_N where N is the current generation
|
705
366
|
*/
|
706
|
-
char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
|
707
|
-
{
|
367
|
+
char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store) {
|
708
368
|
return segfn_for_generation(buf, frt_sis_current_segment_generation(store));
|
709
369
|
}
|
710
370
|
|
@@ -717,16 +377,14 @@ char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
|
|
717
377
|
*/
|
718
378
|
/*
|
719
379
|
FIXME: not used
|
720
|
-
static char *sis_next_seg_file_name(char *buf, FrtStore *store)
|
721
|
-
{
|
380
|
+
static char *sis_next_seg_file_name(char *buf, FrtStore *store) {
|
722
381
|
return segfn_for_generation(buf, frt_sis_current_segment_generation(store) + 1);
|
723
382
|
}
|
724
383
|
*/
|
725
384
|
|
726
385
|
#define GEN_FILE_RETRY_COUNT 10
|
727
386
|
#define GEN_LOOK_AHEAD_COUNT 10
|
728
|
-
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir)
|
729
|
-
{
|
387
|
+
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir) {
|
730
388
|
volatile int i;
|
731
389
|
volatile int gen_look_ahead_count = 0;
|
732
390
|
volatile bool retry = false;
|
@@ -763,7 +421,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
763
421
|
FrtInStream *gen_is;
|
764
422
|
gen_is = NULL;
|
765
423
|
FRT_TRY
|
766
|
-
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
424
|
+
gen_is = store->open_input(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
|
767
425
|
FRT_XCATCHALL
|
768
426
|
FRT_HANDLED();
|
769
427
|
/* TODO:LOG "segments open: FRT_IO_ERROR"*/
|
@@ -813,7 +471,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
813
471
|
* this must be a real error. We throw the original exception
|
814
472
|
* we got. */
|
815
473
|
char *listing, listing_buffer[1024];
|
816
|
-
listing =
|
474
|
+
listing = frt_store_folder_to_s(store, segm_idx_name);
|
817
475
|
strncpy(listing_buffer, listing, 1023);
|
818
476
|
listing_buffer[1023] = '\0';
|
819
477
|
free(listing);
|
@@ -846,7 +504,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
846
504
|
* and try it if so: */
|
847
505
|
char prev_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
848
506
|
segfn_for_generation(prev_seg_file_name, gen - 1);
|
849
|
-
if (store->exists(store, prev_seg_file_name)) {
|
507
|
+
if (store->exists(store, segm_idx_name, prev_seg_file_name)) {
|
850
508
|
/* TODO:LOG "fallback to prior segment file '" +
|
851
509
|
* prevSegmentFileName + "'" */
|
852
510
|
FRT_TRY
|
@@ -870,8 +528,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
870
528
|
}
|
871
529
|
}
|
872
530
|
|
873
|
-
FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
|
874
|
-
{
|
531
|
+
FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis) {
|
875
532
|
FrtSegmentInfos *sis = FRT_ALLOC_AND_ZERO(FrtSegmentInfos);
|
876
533
|
FRT_REF(fis);
|
877
534
|
sis->fis = fis;
|
@@ -885,13 +542,11 @@ FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
|
|
885
542
|
return sis;
|
886
543
|
}
|
887
544
|
|
888
|
-
FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store)
|
889
|
-
{
|
545
|
+
FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store) {
|
890
546
|
return frt_sis_add_si(sis, frt_si_new(new_segment(sis->counter++), doc_cnt, store));
|
891
547
|
}
|
892
548
|
|
893
|
-
void frt_sis_destroy(FrtSegmentInfos *sis)
|
894
|
-
{
|
549
|
+
void frt_sis_destroy(FrtSegmentInfos *sis) {
|
895
550
|
int i;
|
896
551
|
const int sis_size = sis->size;
|
897
552
|
for (i = 0; i < sis_size; i++) {
|
@@ -903,8 +558,7 @@ void frt_sis_destroy(FrtSegmentInfos *sis)
|
|
903
558
|
free(sis);
|
904
559
|
}
|
905
560
|
|
906
|
-
FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
|
907
|
-
{
|
561
|
+
FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si) {
|
908
562
|
if (sis->size >= sis->capa) {
|
909
563
|
sis->capa <<= 1;
|
910
564
|
FRT_REALLOC_N(sis->segs, FrtSegmentInfo *, sis->capa);
|
@@ -913,8 +567,7 @@ FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
|
|
913
567
|
return si;
|
914
568
|
}
|
915
569
|
|
916
|
-
void frt_sis_del_at(FrtSegmentInfos *sis, int at)
|
917
|
-
{
|
570
|
+
void frt_sis_del_at(FrtSegmentInfos *sis, int at) {
|
918
571
|
int i;
|
919
572
|
const int sis_size = --(sis->size);
|
920
573
|
frt_si_close(sis->segs[at]);
|
@@ -923,8 +576,7 @@ void frt_sis_del_at(FrtSegmentInfos *sis, int at)
|
|
923
576
|
}
|
924
577
|
}
|
925
578
|
|
926
|
-
void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
927
|
-
{
|
579
|
+
void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to) {
|
928
580
|
int i, num_to_del = to - from;
|
929
581
|
const int sis_size = sis->size -= num_to_del;
|
930
582
|
for (i = from; i < to; i++) {
|
@@ -935,8 +587,7 @@ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
|
935
587
|
}
|
936
588
|
}
|
937
589
|
|
938
|
-
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
939
|
-
{
|
590
|
+
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
|
940
591
|
int seg_cnt;
|
941
592
|
int i;
|
942
593
|
frt_u32 format = 0;
|
@@ -947,7 +598,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
947
598
|
segfn_for_generation(seg_file_name, fsf->generation);
|
948
599
|
fsf->ret.sis = NULL;
|
949
600
|
FRT_TRY
|
950
|
-
is = store->open_input(store, seg_file_name);
|
601
|
+
is = store->open_input(store, segm_idx_name, seg_file_name);
|
951
602
|
sis->store = store;
|
952
603
|
FRT_REF(store);
|
953
604
|
sis->generation = fsf->generation;
|
@@ -973,22 +624,20 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
973
624
|
fsf->ret.sis = sis;
|
974
625
|
}
|
975
626
|
|
976
|
-
FrtSegmentInfos *frt_sis_read(FrtStore *store)
|
977
|
-
{
|
627
|
+
FrtSegmentInfos *frt_sis_read(FrtStore *store) {
|
978
628
|
FindSegmentsFile fsf;
|
979
629
|
sis_find_segments_file(store, &fsf, &frt_sis_read_i, NULL);
|
980
630
|
return fsf.ret.sis;
|
981
631
|
}
|
982
632
|
|
983
|
-
void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
984
|
-
{
|
633
|
+
void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter) {
|
985
634
|
int i;
|
986
635
|
FrtOutStream *volatile os = NULL;
|
987
636
|
const int sis_size = sis->size;
|
988
637
|
char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
|
989
638
|
sis->generation++;
|
990
639
|
FRT_TRY
|
991
|
-
os = store->new_output(store, segfn_for_generation(buf, sis->generation));
|
640
|
+
os = store->new_output(store, segm_idx_name, segfn_for_generation(buf, sis->generation));
|
992
641
|
frt_os_write_u32(os, FORMAT);
|
993
642
|
frt_os_write_u64(os, ++(sis->version)); /* every write changes the index */
|
994
643
|
frt_os_write_u64(os, sis->counter);
|
@@ -1002,7 +651,7 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
1002
651
|
FRT_XENDTRY
|
1003
652
|
|
1004
653
|
FRT_TRY
|
1005
|
-
os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
|
654
|
+
os = store->new_output(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
|
1006
655
|
frt_os_write_u64(os, sis->generation);
|
1007
656
|
frt_os_write_u64(os, sis->generation);
|
1008
657
|
FRT_XFINALLY
|
@@ -1016,15 +665,14 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
1016
665
|
}
|
1017
666
|
}
|
1018
667
|
|
1019
|
-
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
1020
|
-
{
|
668
|
+
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
|
1021
669
|
FrtInStream *is;
|
1022
670
|
frt_u32 format = 0;
|
1023
671
|
frt_u64 version = 0;
|
1024
672
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
1025
673
|
|
1026
674
|
segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
|
1027
|
-
is = store->open_input(store, seg_file_name);
|
675
|
+
is = store->open_input(store, segm_idx_name, seg_file_name);
|
1028
676
|
|
1029
677
|
FRT_TRY
|
1030
678
|
format = frt_is_read_u32(is); // format
|
@@ -1037,362 +685,12 @@ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexR
|
|
1037
685
|
fsf->ret.uint64 = version;
|
1038
686
|
}
|
1039
687
|
|
1040
|
-
frt_u64 frt_sis_read_current_version(FrtStore *store)
|
1041
|
-
{
|
688
|
+
frt_u64 frt_sis_read_current_version(FrtStore *store) {
|
1042
689
|
FindSegmentsFile fsf;
|
1043
690
|
sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i, NULL);
|
1044
691
|
return fsf.ret.uint64;
|
1045
692
|
}
|
1046
693
|
|
1047
|
-
/****************************************************************************
|
1048
|
-
*
|
1049
|
-
* LazyDocField
|
1050
|
-
*
|
1051
|
-
****************************************************************************/
|
1052
|
-
|
1053
|
-
static FrtLazyDocField *lazy_df_new(ID name, const int size, FrtCompressionType compression) {
|
1054
|
-
FrtLazyDocField *self = FRT_ALLOC(FrtLazyDocField);
|
1055
|
-
self->name = name;
|
1056
|
-
self->size = size;
|
1057
|
-
self->data = FRT_ALLOC_AND_ZERO_N(FrtLazyDocFieldData, size);
|
1058
|
-
self->compression = compression;
|
1059
|
-
self->decompressed = false;
|
1060
|
-
self->loaded = false;
|
1061
|
-
return self;
|
1062
|
-
}
|
1063
|
-
|
1064
|
-
static void lazy_df_destroy(FrtLazyDocField *self) {
|
1065
|
-
int i;
|
1066
|
-
for (i = self->size - 1; i >= 0; i--) {
|
1067
|
-
if (self->data[i].text) {
|
1068
|
-
free(self->data[i].text);
|
1069
|
-
}
|
1070
|
-
}
|
1071
|
-
free(self->data);
|
1072
|
-
free(self);
|
1073
|
-
}
|
1074
|
-
|
1075
|
-
static void comp_raise(void) {
|
1076
|
-
FRT_RAISE(EXCEPTION, "Compression error");
|
1077
|
-
}
|
1078
|
-
|
1079
|
-
static char *is_read_brotli_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
|
1080
|
-
int buf_out_idx = 0;
|
1081
|
-
int read_len;
|
1082
|
-
frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
|
1083
|
-
const frt_uchar *next_in;
|
1084
|
-
size_t available_in;
|
1085
|
-
frt_uchar *buf_out = NULL;
|
1086
|
-
frt_uchar *next_out;
|
1087
|
-
size_t available_out;
|
1088
|
-
|
1089
|
-
BrotliDecoderState *b_state = BrotliDecoderCreateInstance(NULL, NULL, NULL);
|
1090
|
-
BrotliDecoderResult b_result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
|
1091
|
-
if (!b_state) { comp_raise(); return NULL; }
|
1092
|
-
|
1093
|
-
do {
|
1094
|
-
read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1095
|
-
frt_is_read_bytes(is, buf_in, read_len);
|
1096
|
-
compressed_len -= read_len;
|
1097
|
-
available_in = read_len;
|
1098
|
-
next_in = buf_in;
|
1099
|
-
available_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1100
|
-
do {
|
1101
|
-
FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
|
1102
|
-
next_out = buf_out + buf_out_idx;
|
1103
|
-
b_result = BrotliDecoderDecompressStream(b_state,
|
1104
|
-
&available_in, &next_in,
|
1105
|
-
&available_out, &next_out, NULL);
|
1106
|
-
if (b_result == BROTLI_DECODER_RESULT_ERROR) { comp_raise(); return NULL; }
|
1107
|
-
buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - available_out;
|
1108
|
-
} while (b_result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
|
1109
|
-
} while (b_result != BROTLI_DECODER_RESULT_SUCCESS && compressed_len > 0);
|
1110
|
-
|
1111
|
-
BrotliDecoderDestroyInstance(b_state);
|
1112
|
-
|
1113
|
-
FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + 1);
|
1114
|
-
buf_out[buf_out_idx] = '\0';
|
1115
|
-
*len = buf_out_idx;
|
1116
|
-
return (char *)buf_out;
|
1117
|
-
}
|
1118
|
-
|
1119
|
-
static void zraise(int ret) {
|
1120
|
-
switch (ret) {
|
1121
|
-
case BZ_IO_ERROR:
|
1122
|
-
if (ferror(stdin))
|
1123
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: error reading stdin");
|
1124
|
-
if (ferror(stdout))
|
1125
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: error writing stdout");
|
1126
|
-
break;
|
1127
|
-
case BZ_CONFIG_ERROR:
|
1128
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: system configuration error");
|
1129
|
-
break;
|
1130
|
-
case BZ_SEQUENCE_ERROR: /* shouldn't occur if code is correct */
|
1131
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! sequence error");
|
1132
|
-
break;
|
1133
|
-
case BZ_PARAM_ERROR: /* shouldn't occur if code is correct */
|
1134
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! parameter error");
|
1135
|
-
break;
|
1136
|
-
case BZ_MEM_ERROR:
|
1137
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: memory error");
|
1138
|
-
break;
|
1139
|
-
case BZ_DATA_ERROR:
|
1140
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check error");
|
1141
|
-
break;
|
1142
|
-
case BZ_DATA_ERROR_MAGIC:
|
1143
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check - non-matching magic");
|
1144
|
-
break;
|
1145
|
-
case BZ_UNEXPECTED_EOF:
|
1146
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: unexpected end-of-file");
|
1147
|
-
break;
|
1148
|
-
case BZ_OUTBUFF_FULL:
|
1149
|
-
FRT_RAISE(FRT_IO_ERROR, "bzlib: output buffer full");
|
1150
|
-
break;
|
1151
|
-
default:
|
1152
|
-
FRT_RAISE(FRT_EXCEPTION, "bzlib: unknown error");
|
1153
|
-
}
|
1154
|
-
}
|
1155
|
-
|
1156
|
-
static char *is_read_bz2_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
|
1157
|
-
int buf_out_idx = 0, ret, read_len;
|
1158
|
-
char *buf_out = NULL;
|
1159
|
-
char buf_in[FRT_COMPRESSION_BUFFER_SIZE];
|
1160
|
-
bz_stream zstrm;
|
1161
|
-
zstrm.bzalloc = NULL;
|
1162
|
-
zstrm.bzfree = NULL;
|
1163
|
-
zstrm.opaque = NULL;
|
1164
|
-
zstrm.next_in = NULL;
|
1165
|
-
zstrm.avail_in = 0;
|
1166
|
-
if ((ret = BZ2_bzDecompressInit(&zstrm, 0, 0)) != BZ_OK) zraise(ret);
|
1167
|
-
|
1168
|
-
do {
|
1169
|
-
read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1170
|
-
frt_is_read_bytes(is, (frt_uchar *)buf_in, read_len);
|
1171
|
-
compressed_len -= read_len;
|
1172
|
-
zstrm.avail_in = read_len;
|
1173
|
-
zstrm.next_in = buf_in;
|
1174
|
-
zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1175
|
-
|
1176
|
-
do {
|
1177
|
-
REALLOC_N(buf_out, char, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
|
1178
|
-
zstrm.next_out = buf_out + buf_out_idx;
|
1179
|
-
ret = BZ2_bzDecompress(&zstrm);
|
1180
|
-
assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
|
1181
|
-
if (ret != BZ_OK && ret != BZ_STREAM_END) {
|
1182
|
-
(void)BZ2_bzDecompressEnd(&zstrm);
|
1183
|
-
zraise(ret);
|
1184
|
-
}
|
1185
|
-
buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
|
1186
|
-
} while (zstrm.avail_out == 0);
|
1187
|
-
} while (ret != BZ_STREAM_END && compressed_len != 0);
|
1188
|
-
|
1189
|
-
(void)BZ2_bzDecompressEnd(&zstrm);
|
1190
|
-
|
1191
|
-
FRT_REALLOC_N(buf_out, char, buf_out_idx + 1);
|
1192
|
-
buf_out[buf_out_idx] = '\0';
|
1193
|
-
|
1194
|
-
*len = buf_out_idx;
|
1195
|
-
return (char *)buf_out;
|
1196
|
-
}
|
1197
|
-
|
1198
|
-
static char *is_read_lz4_compressed_bytes(FrtInStream *is, int compressed_len, int *length) {
|
1199
|
-
frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
|
1200
|
-
char *buf_out = NULL;
|
1201
|
-
int dc_length = 0;
|
1202
|
-
LZ4F_dctx *dctx;
|
1203
|
-
LZ4F_frameInfo_t frame_info;
|
1204
|
-
LZ4F_errorCode_t dctx_status = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
|
1205
|
-
if (LZ4F_isError(dctx_status)) { *length = -1; return NULL; }
|
1206
|
-
|
1207
|
-
/* header and buffer */
|
1208
|
-
int read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1209
|
-
frt_is_read_bytes(is, buf_in, read_length);
|
1210
|
-
compressed_len -= read_length;
|
1211
|
-
|
1212
|
-
size_t consumed_size = read_length;
|
1213
|
-
size_t res = LZ4F_getFrameInfo(dctx, &frame_info, buf_in, &consumed_size);
|
1214
|
-
if (LZ4F_isError(res)) { *length = -1; return NULL; }
|
1215
|
-
size_t buf_out_length;
|
1216
|
-
switch(frame_info.blockSizeID) {
|
1217
|
-
case LZ4F_default:
|
1218
|
-
case LZ4F_max64KB:
|
1219
|
-
buf_out_length = 1 << 16;
|
1220
|
-
break;
|
1221
|
-
case LZ4F_max256KB:
|
1222
|
-
buf_out_length = 1 << 18;
|
1223
|
-
break;
|
1224
|
-
case LZ4F_max1MB:
|
1225
|
-
buf_out_length = 1 << 20;
|
1226
|
-
break;
|
1227
|
-
case LZ4F_max4MB:
|
1228
|
-
buf_out_length = 1 << 22;
|
1229
|
-
break;
|
1230
|
-
default:
|
1231
|
-
buf_out_length = 0;
|
1232
|
-
}
|
1233
|
-
|
1234
|
-
res = 1;
|
1235
|
-
int first_chunk = 1;
|
1236
|
-
|
1237
|
-
/* decompress data */
|
1238
|
-
while (res != 0) {
|
1239
|
-
if (!first_chunk) {
|
1240
|
-
read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
|
1241
|
-
frt_is_read_bytes(is, buf_in, read_length);
|
1242
|
-
compressed_len -= read_length;
|
1243
|
-
consumed_size = 0;
|
1244
|
-
}
|
1245
|
-
first_chunk = 0;
|
1246
|
-
|
1247
|
-
char *src = (char *)(buf_in + consumed_size);
|
1248
|
-
char *src_end = (char *)buf_in + read_length;
|
1249
|
-
|
1250
|
-
while (src < src_end && res != 0){
|
1251
|
-
size_t dest_length = buf_out_length;
|
1252
|
-
size_t consumed_size = read_length;
|
1253
|
-
FRT_REALLOC_N(buf_out, char, dc_length + buf_out_length);
|
1254
|
-
res = LZ4F_decompress(dctx, buf_out + dc_length, &dest_length, src, &consumed_size, NULL);
|
1255
|
-
if (LZ4F_isError(res)) { *length = -1; return NULL; }
|
1256
|
-
dc_length += dest_length;
|
1257
|
-
src = src + consumed_size;
|
1258
|
-
}
|
1259
|
-
}
|
1260
|
-
|
1261
|
-
/* finish up */
|
1262
|
-
LZ4F_freeDecompressionContext(dctx);
|
1263
|
-
|
1264
|
-
FRT_REALLOC_N(buf_out, char, dc_length + 1);
|
1265
|
-
buf_out[dc_length] = '\0';
|
1266
|
-
|
1267
|
-
*length = dc_length;
|
1268
|
-
return buf_out;
|
1269
|
-
}
|
1270
|
-
|
1271
|
-
static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len, FrtCompressionType compression) {
|
1272
|
-
switch (compression) {
|
1273
|
-
case FRT_COMPRESSION_BROTLI:
|
1274
|
-
return is_read_brotli_compressed_bytes(is, compressed_len, len);
|
1275
|
-
case FRT_COMPRESSION_BZ2:
|
1276
|
-
return is_read_bz2_compressed_bytes(is, compressed_len, len);
|
1277
|
-
case FRT_COMPRESSION_LZ4:
|
1278
|
-
return is_read_lz4_compressed_bytes(is, compressed_len, len);
|
1279
|
-
default:
|
1280
|
-
return NULL;
|
1281
|
-
}
|
1282
|
-
}
|
1283
|
-
|
1284
|
-
char *frt_lazy_df_get_data(FrtLazyDocField *self, int i) {
|
1285
|
-
char *text = NULL;
|
1286
|
-
if (i < self->size && i >= 0) {
|
1287
|
-
text = self->data[i].text;
|
1288
|
-
if (NULL == text) {
|
1289
|
-
const int read_len = self->data[i].length + 1;
|
1290
|
-
frt_is_seek(self->doc->fields_in, self->data[i].start);
|
1291
|
-
if (self->data[i].compression != FRT_COMPRESSION_NONE) {
|
1292
|
-
self->data[i].text = text = is_read_compressed_bytes(self->doc->fields_in, read_len, &(self->data[i].length), self->data[i].compression);
|
1293
|
-
} else {
|
1294
|
-
self->data[i].text = text = FRT_ALLOC_N(char, read_len);
|
1295
|
-
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)text, read_len);
|
1296
|
-
text[read_len - 1] = '\0';
|
1297
|
-
}
|
1298
|
-
self->loaded = true;
|
1299
|
-
}
|
1300
|
-
}
|
1301
|
-
|
1302
|
-
return text;
|
1303
|
-
}
|
1304
|
-
|
1305
|
-
void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len) {
|
1306
|
-
if (self->compression != FRT_COMPRESSION_NONE && !self->decompressed) {
|
1307
|
-
int i;
|
1308
|
-
self->len = 0;
|
1309
|
-
for (i = self->size-1; i >= 0; i--) {
|
1310
|
-
(void)frt_lazy_df_get_data(self, i);
|
1311
|
-
self->len += self->data[i].length + 1;
|
1312
|
-
}
|
1313
|
-
self->len--; /* each field separated by ' ' but no need to add to end */
|
1314
|
-
self->decompressed = true;
|
1315
|
-
}
|
1316
|
-
if (start < 0 || start >= self->len) {
|
1317
|
-
FRT_RAISE(FRT_IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
|
1318
|
-
"is not between 0 and %d", start, self->len);
|
1319
|
-
}
|
1320
|
-
if (len <= 0) {
|
1321
|
-
FRT_RAISE(FRT_IO_ERROR, "len = %d, but should be greater than 0", len);
|
1322
|
-
}
|
1323
|
-
if (start + len > self->len) {
|
1324
|
-
FRT_RAISE(FRT_IO_ERROR, "Tried to read past end of field. Field is only %d "
|
1325
|
-
"bytes long but tried to read to %d", self->len, start + len);
|
1326
|
-
}
|
1327
|
-
if (self->compression != FRT_COMPRESSION_NONE) {
|
1328
|
-
int cur_start = 0, buf_start = 0, cur_end, i, copy_start, copy_len;
|
1329
|
-
for (i = 0; i < self->size; i++) {
|
1330
|
-
cur_end = cur_start + self->data[i].length;
|
1331
|
-
if (start < cur_end) {
|
1332
|
-
copy_start = start > cur_start ? start - cur_start : 0;
|
1333
|
-
copy_len = cur_end - cur_start - copy_start;
|
1334
|
-
if (copy_len >= len) {
|
1335
|
-
copy_len = len;
|
1336
|
-
len = 0;
|
1337
|
-
}
|
1338
|
-
else {
|
1339
|
-
len -= copy_len;
|
1340
|
-
}
|
1341
|
-
memcpy(buf + buf_start,
|
1342
|
-
self->data[i].text + copy_start,
|
1343
|
-
copy_len);
|
1344
|
-
buf_start += copy_len;
|
1345
|
-
if (len > 0) {
|
1346
|
-
buf[buf_start++] = ' ';
|
1347
|
-
len--;
|
1348
|
-
}
|
1349
|
-
if (len == 0) break;
|
1350
|
-
}
|
1351
|
-
cur_start = cur_end + 1;
|
1352
|
-
}
|
1353
|
-
} else {
|
1354
|
-
frt_is_seek(self->doc->fields_in, self->data[0].start + start);
|
1355
|
-
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)buf, len);
|
1356
|
-
}
|
1357
|
-
}
|
1358
|
-
|
1359
|
-
/****************************************************************************
|
1360
|
-
*
|
1361
|
-
* LazyDoc
|
1362
|
-
*
|
1363
|
-
****************************************************************************/
|
1364
|
-
|
1365
|
-
static FrtLazyDoc *lazy_doc_new(int size, FrtInStream *fdt_in)
|
1366
|
-
{
|
1367
|
-
FrtLazyDoc *self = FRT_ALLOC(FrtLazyDoc);
|
1368
|
-
self->field_dictionary = frt_h_new_ptr((frt_free_ft)&lazy_df_destroy);
|
1369
|
-
self->size = size;
|
1370
|
-
self->fields = FRT_ALLOC_AND_ZERO_N(FrtLazyDocField *, size);
|
1371
|
-
self->fields_in = frt_is_clone(fdt_in);
|
1372
|
-
self->loaded = false;
|
1373
|
-
return self;
|
1374
|
-
}
|
1375
|
-
|
1376
|
-
void frt_lazy_doc_close(FrtLazyDoc *self)
|
1377
|
-
{
|
1378
|
-
frt_h_destroy(self->field_dictionary);
|
1379
|
-
frt_is_close(self->fields_in);
|
1380
|
-
free(self->fields);
|
1381
|
-
free(self);
|
1382
|
-
}
|
1383
|
-
|
1384
|
-
static void lazy_doc_add_field(FrtLazyDoc *self, FrtLazyDocField *lazy_df, int i)
|
1385
|
-
{
|
1386
|
-
self->fields[i] = lazy_df;
|
1387
|
-
|
1388
|
-
frt_h_set(self->field_dictionary, (void *)lazy_df->name, lazy_df);
|
1389
|
-
lazy_df->doc = self;
|
1390
|
-
}
|
1391
|
-
|
1392
|
-
FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field) {
|
1393
|
-
return (FrtLazyDocField *)frt_h_get(self->field_dictionary, (void *)field);
|
1394
|
-
}
|
1395
|
-
|
1396
694
|
/****************************************************************************
|
1397
695
|
* FrtFieldsReader
|
1398
696
|
****************************************************************************/
|
@@ -1409,9 +707,9 @@ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1409
707
|
fr->fis = fis;
|
1410
708
|
|
1411
709
|
strcpy(file_name + segment_len, ".fdt");
|
1412
|
-
fr->fdt_in = store->open_input(store, file_name);
|
710
|
+
fr->fdt_in = store->open_input(store, segm_idx_name, file_name);
|
1413
711
|
strcpy(file_name + segment_len, ".fdx");
|
1414
|
-
fr->fdx_in = store->open_input(store, file_name);
|
712
|
+
fr->fdx_in = store->open_input(store, segm_idx_name, file_name);
|
1415
713
|
fr->size = frt_is_length(fr->fdx_in) / FIELDS_IDX_PTR_SIZE;
|
1416
714
|
fr->store = store;
|
1417
715
|
FRT_REF(store);
|
@@ -1437,32 +735,30 @@ void frt_fr_close(FrtFieldsReader *fr) {
|
|
1437
735
|
free(fr);
|
1438
736
|
}
|
1439
737
|
|
1440
|
-
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType
|
738
|
+
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression_type) {
|
1441
739
|
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
1442
740
|
df->name = name;
|
1443
741
|
df->capa = df->size = size;
|
1444
|
-
df->data = FRT_ALLOC_N(char *, df->capa);
|
742
|
+
df->data = FRT_ALLOC_N(const char *, df->capa);
|
1445
743
|
df->lengths = FRT_ALLOC_N(int, df->capa);
|
1446
744
|
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
1447
|
-
df->destroy_data = true;
|
1448
745
|
df->boost = 1.0f;
|
1449
|
-
df->
|
746
|
+
df->compression_type = compression_type;
|
1450
747
|
return df;
|
1451
748
|
}
|
1452
749
|
|
1453
|
-
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType
|
750
|
+
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression_type) {
|
1454
751
|
int i;
|
1455
752
|
const int df_size = df->size;
|
1456
753
|
FrtInStream *fdt_in = fr->fdt_in;
|
1457
754
|
|
1458
755
|
for (i = 0; i < df_size; i++) {
|
1459
|
-
const int compressed_len = df->lengths[i]
|
1460
|
-
df->data[i] =
|
756
|
+
const int compressed_len = df->lengths[i];
|
757
|
+
df->data[i] = frt_is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression_type);
|
1461
758
|
}
|
1462
759
|
}
|
1463
760
|
|
1464
|
-
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
1465
|
-
{
|
761
|
+
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num) {
|
1466
762
|
int i, j;
|
1467
763
|
frt_off_t pos;
|
1468
764
|
int stored_cnt;
|
@@ -1478,28 +774,29 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1478
774
|
for (i = 0; i < stored_cnt; i++) {
|
1479
775
|
const int field_num = frt_is_read_vint(fdt_in);
|
1480
776
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
1481
|
-
const int
|
1482
|
-
FrtDocField *df = frt_fr_df_new(fi->name,
|
777
|
+
const int df_field_count = frt_is_read_vint(fdt_in);
|
778
|
+
FrtDocField *df = frt_fr_df_new(fi->name, df_field_count, bits_get_compression_type(fi->bits));
|
1483
779
|
|
1484
|
-
for (j = 0; j <
|
780
|
+
for (j = 0; j < df_field_count; j++) {
|
1485
781
|
df->lengths[j] = frt_is_read_vint(fdt_in);
|
1486
782
|
df->encodings[j] = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1487
|
-
df->
|
783
|
+
df->compression_type = frt_is_read_vint(fdt_in);
|
1488
784
|
}
|
1489
785
|
|
1490
786
|
frt_doc_add_field(doc, df);
|
1491
787
|
}
|
1492
788
|
for (i = 0; i < stored_cnt; i++) {
|
1493
789
|
FrtDocField *df = doc->fields[i];
|
1494
|
-
if (df->
|
1495
|
-
frt_fr_read_compressed_fields(fr, df, df->
|
790
|
+
if (df->compression_type != FRT_COMPRESSION_NONE) {
|
791
|
+
frt_fr_read_compressed_fields(fr, df, df->compression_type);
|
1496
792
|
} else {
|
1497
793
|
const int df_size = df->size;
|
1498
794
|
for (j = 0; j < df_size; j++) {
|
1499
|
-
const int read_len = df->lengths[j]
|
1500
|
-
|
1501
|
-
frt_is_read_bytes(fdt_in, (frt_uchar *)
|
1502
|
-
|
795
|
+
const int read_len = df->lengths[j];
|
796
|
+
char *d = FRT_ALLOC_N(char, read_len + 1);
|
797
|
+
frt_is_read_bytes(fdt_in, (frt_uchar *)d, read_len);
|
798
|
+
d[read_len] = '\0';
|
799
|
+
df->data[j] = d;
|
1503
800
|
}
|
1504
801
|
}
|
1505
802
|
}
|
@@ -1507,8 +804,7 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1507
804
|
return doc;
|
1508
805
|
}
|
1509
806
|
|
1510
|
-
FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
1511
|
-
{
|
807
|
+
FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num) {
|
1512
808
|
int start = 0;
|
1513
809
|
int i, j;
|
1514
810
|
frt_off_t pos;
|
@@ -1522,23 +818,23 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1522
818
|
frt_is_seek(fdt_in, pos);
|
1523
819
|
stored_cnt = frt_is_read_vint(fdt_in);
|
1524
820
|
|
1525
|
-
lazy_doc =
|
821
|
+
lazy_doc = frt_lazy_doc_new(stored_cnt, fdt_in);
|
1526
822
|
for (i = 0; i < stored_cnt; i++) {
|
1527
823
|
FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
|
1528
824
|
const int df_size = frt_is_read_vint(fdt_in);
|
1529
|
-
FrtLazyDocField *lazy_df =
|
825
|
+
FrtLazyDocField *lazy_df = frt_lazy_df_new(fi->name, df_size, bits_get_compression_type(fi->bits));
|
1530
826
|
const int field_start = start;
|
1531
827
|
/* get the starts relative positions this time around */
|
1532
828
|
|
1533
829
|
for (j = 0; j < df_size; j++) {
|
1534
830
|
lazy_df->data[j].start = start;
|
1535
|
-
start +=
|
831
|
+
start += (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
|
1536
832
|
lazy_df->data[j].encoding = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1537
|
-
lazy_df->data[j].
|
833
|
+
lazy_df->data[j].compression_type = frt_is_read_vint(fdt_in);
|
1538
834
|
}
|
1539
835
|
|
1540
|
-
lazy_df->len = start - field_start
|
1541
|
-
|
836
|
+
lazy_df->len = start - field_start;
|
837
|
+
frt_lazy_doc_add_field(lazy_doc, lazy_df, i);
|
1542
838
|
}
|
1543
839
|
/* correct the starts to their correct absolute positions */
|
1544
840
|
const frt_off_t abs_start = frt_is_pos(fdt_in);
|
@@ -1553,8 +849,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1553
849
|
return lazy_doc;
|
1554
850
|
}
|
1555
851
|
|
1556
|
-
static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num)
|
1557
|
-
{
|
852
|
+
static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num) {
|
1558
853
|
FrtTermVector *tv = FRT_ALLOC_AND_ZERO(FrtTermVector);
|
1559
854
|
FrtInStream *fdt_in = fr->fdt_in;
|
1560
855
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
@@ -1612,8 +907,7 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
|
|
1612
907
|
return tv;
|
1613
908
|
}
|
1614
909
|
|
1615
|
-
FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num)
|
1616
|
-
{
|
910
|
+
FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num) {
|
1617
911
|
FrtHash *term_vectors = frt_h_new_ptr((frt_free_ft)&frt_tv_destroy);
|
1618
912
|
int i;
|
1619
913
|
FrtInStream *fdx_in = fr->fdx_in;
|
@@ -1699,10 +993,10 @@ FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1699
993
|
memcpy(file_name, segment, segment_len);
|
1700
994
|
|
1701
995
|
strcpy(file_name + segment_len, ".fdt");
|
1702
|
-
fw->fdt_out = store->new_output(store, file_name);
|
996
|
+
fw->fdt_out = store->new_output(store, segm_idx_name, file_name);
|
1703
997
|
|
1704
998
|
strcpy(file_name + segment_len, ".fdx");
|
1705
|
-
fw->fdx_out = store->new_output(store, file_name);
|
999
|
+
fw->fdx_out = store->new_output(store, segm_idx_name, file_name);
|
1706
1000
|
|
1707
1001
|
fw->buffer = frt_ram_new_buffer();
|
1708
1002
|
|
@@ -1720,152 +1014,13 @@ void frt_fw_close(FrtFieldsWriter *fw) {
|
|
1720
1014
|
free(fw);
|
1721
1015
|
}
|
1722
1016
|
|
1723
|
-
static int frt_os_write_brotli_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
|
1724
|
-
size_t compressed_length = 0;
|
1725
|
-
const frt_uchar *next_in = data;
|
1726
|
-
size_t available_in = length;
|
1727
|
-
size_t available_out;
|
1728
|
-
frt_uchar compression_buffer[FRT_COMPRESSION_BUFFER_SIZE];
|
1729
|
-
frt_uchar *next_out;
|
1730
|
-
BrotliEncoderState *b_state = BrotliEncoderCreateInstance(NULL, NULL, NULL);
|
1731
|
-
if (!b_state) { comp_raise(); return -1; }
|
1732
|
-
|
1733
|
-
BrotliEncoderSetParameter(b_state, BROTLI_PARAM_QUALITY, FRT_BROTLI_COMPRESSION_LEVEL);
|
1734
|
-
|
1735
|
-
do {
|
1736
|
-
available_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1737
|
-
next_out = compression_buffer;
|
1738
|
-
if (!BrotliEncoderCompressStream(b_state, BROTLI_OPERATION_FINISH,
|
1739
|
-
&available_in, &next_in,
|
1740
|
-
&available_out, &next_out, &compressed_length)) {
|
1741
|
-
BrotliEncoderDestroyInstance(b_state);
|
1742
|
-
comp_raise();
|
1743
|
-
return -1;
|
1744
|
-
}
|
1745
|
-
frt_os_write_bytes(out_stream, compression_buffer, FRT_COMPRESSION_BUFFER_SIZE - available_out);
|
1746
|
-
} while (!BrotliEncoderIsFinished(b_state));
|
1747
|
-
|
1748
|
-
BrotliEncoderDestroyInstance(b_state);
|
1749
|
-
|
1750
|
-
return (int)compressed_length;
|
1751
|
-
}
|
1752
|
-
|
1753
|
-
static int frt_os_write_bz2_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
|
1754
|
-
int ret, buf_size, compressed_len = 0;
|
1755
|
-
char out_buffer[FRT_COMPRESSION_BUFFER_SIZE];
|
1756
|
-
bz_stream zstrm;
|
1757
|
-
zstrm.bzalloc = NULL;
|
1758
|
-
zstrm.bzfree = NULL;
|
1759
|
-
zstrm.opaque = NULL;
|
1760
|
-
if ((ret = BZ2_bzCompressInit(&zstrm, FRT_BZIP_COMPRESSION_LEVEL, 0, 0)) != BZ_OK) zraise(ret);
|
1761
|
-
|
1762
|
-
zstrm.avail_in = length;
|
1763
|
-
zstrm.next_in = (char *)data;
|
1764
|
-
zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
|
1765
|
-
zstrm.next_out = out_buffer;
|
1766
|
-
|
1767
|
-
do {
|
1768
|
-
ret = BZ2_bzCompress(&zstrm, BZ_FINISH); /* no bad return value */
|
1769
|
-
assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
|
1770
|
-
compressed_len += buf_size = FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
|
1771
|
-
frt_os_write_bytes(out_stream, (frt_uchar *)out_buffer, buf_size);
|
1772
|
-
} while (zstrm.avail_out == 0);
|
1773
|
-
assert(zstrm.avail_in == 0); /* all input will be used */
|
1774
|
-
|
1775
|
-
(void)BZ2_bzCompressEnd(&zstrm);
|
1776
|
-
return compressed_len;
|
1777
|
-
}
|
1778
|
-
|
1779
|
-
static const LZ4F_preferences_t lz4_prefs = {
|
1780
|
-
{
|
1781
|
-
LZ4F_default,
|
1782
|
-
LZ4F_blockLinked,
|
1783
|
-
LZ4F_noContentChecksum,
|
1784
|
-
LZ4F_frame,
|
1785
|
-
0, /* unknown content size */
|
1786
|
-
0, /* no dictID */
|
1787
|
-
LZ4F_noBlockChecksum
|
1788
|
-
},
|
1789
|
-
0,
|
1790
|
-
1,
|
1791
|
-
1,
|
1792
|
-
{0,0,0}
|
1793
|
-
};
|
1794
|
-
|
1795
|
-
static int frt_os_write_lz4_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
|
1796
|
-
int compressed_length = 0;
|
1797
|
-
int remaining_length = length;
|
1798
|
-
size_t ccmp_length = 0;
|
1799
|
-
LZ4F_compressionContext_t ctx;
|
1800
|
-
size_t out_buf_length = LZ4F_compressBound(FRT_COMPRESSION_BUFFER_SIZE, &lz4_prefs);
|
1801
|
-
frt_uchar *out_buf = frt_ecalloc(out_buf_length);
|
1802
|
-
|
1803
|
-
size_t ctx_creation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
|
1804
|
-
if (LZ4F_isError(ctx_creation)) {
|
1805
|
-
compressed_length = -1;
|
1806
|
-
goto finish;
|
1807
|
-
}
|
1808
|
-
|
1809
|
-
/* create header */
|
1810
|
-
ccmp_length = LZ4F_compressBegin(ctx, out_buf, out_buf_length, &lz4_prefs);
|
1811
|
-
if (LZ4F_isError(ccmp_length)) {
|
1812
|
-
compressed_length = -1;
|
1813
|
-
goto finish;
|
1814
|
-
}
|
1815
|
-
compressed_length = ccmp_length;
|
1816
|
-
frt_os_write_bytes(out_stream, out_buf, ccmp_length);
|
1817
|
-
|
1818
|
-
/* compress data */
|
1819
|
-
do {
|
1820
|
-
int read_length = (FRT_COMPRESSION_BUFFER_SIZE > remaining_length) ? remaining_length : FRT_COMPRESSION_BUFFER_SIZE;
|
1821
|
-
ccmp_length = LZ4F_compressUpdate(ctx, out_buf, out_buf_length, data + (length - remaining_length), read_length, NULL);
|
1822
|
-
if (LZ4F_isError(ccmp_length)) {
|
1823
|
-
compressed_length = -1;
|
1824
|
-
goto finish;
|
1825
|
-
}
|
1826
|
-
frt_os_write_bytes(out_stream, out_buf, ccmp_length);
|
1827
|
-
compressed_length += ccmp_length;
|
1828
|
-
remaining_length -= read_length;
|
1829
|
-
} while (remaining_length > 0);
|
1830
|
-
|
1831
|
-
/* finish up */
|
1832
|
-
ccmp_length = LZ4F_compressEnd(ctx, out_buf, out_buf_length, NULL);
|
1833
|
-
if (LZ4F_isError(ccmp_length)) {
|
1834
|
-
compressed_length = -1;
|
1835
|
-
goto finish;
|
1836
|
-
}
|
1837
|
-
|
1838
|
-
frt_os_write_bytes(out_stream, out_buf, ccmp_length);
|
1839
|
-
compressed_length += ccmp_length;
|
1840
|
-
|
1841
|
-
finish:
|
1842
|
-
LZ4F_freeCompressionContext(ctx);
|
1843
|
-
free(out_buf);
|
1844
|
-
|
1845
|
-
return compressed_length;
|
1846
|
-
}
|
1847
|
-
|
1848
|
-
static int frt_os_write_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length, FrtCompressionType compression) {
|
1849
|
-
switch (compression) {
|
1850
|
-
case FRT_COMPRESSION_BROTLI:
|
1851
|
-
return frt_os_write_brotli_compressed_bytes(out_stream, data, length);
|
1852
|
-
case FRT_COMPRESSION_BZ2:
|
1853
|
-
return frt_os_write_bz2_compressed_bytes(out_stream, data, length);
|
1854
|
-
case FRT_COMPRESSION_LZ4:
|
1855
|
-
return frt_os_write_lz4_compressed_bytes(out_stream, data, length);
|
1856
|
-
default:
|
1857
|
-
return -1;
|
1858
|
-
}
|
1859
|
-
|
1860
|
-
}
|
1861
|
-
|
1862
1017
|
void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
1863
1018
|
int i, j, stored_cnt = 0;
|
1864
1019
|
FrtDocField *df;
|
1865
1020
|
FrtFieldInfo *fi;
|
1866
|
-
FrtCompressionType
|
1021
|
+
FrtCompressionType compression_type;
|
1867
1022
|
FrtOutStream *fdt_out = fw->fdt_out, *fdx_out = fw->fdx_out;
|
1868
|
-
const int doc_size = doc->
|
1023
|
+
const int doc_size = doc->field_count;
|
1869
1024
|
|
1870
1025
|
for (i = 0; i < doc_size; i++) {
|
1871
1026
|
df = doc->fields[i];
|
@@ -1889,23 +1044,20 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1889
1044
|
frt_os_write_vint(fdt_out, df_size);
|
1890
1045
|
|
1891
1046
|
if (bits_is_compressed(fi->bits)) {
|
1892
|
-
|
1047
|
+
compression_type = bits_get_compression_type(fi->bits);
|
1893
1048
|
for (j = 0; j < df_size; j++) {
|
1894
|
-
|
1895
|
-
|
1896
|
-
frt_os_write_vint(fdt_out, compressed_len - 1);
|
1049
|
+
int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], df->lengths[j], compression_type);
|
1050
|
+
frt_os_write_vint(fdt_out, compressed_len);
|
1897
1051
|
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
1898
|
-
frt_os_write_vint(fdt_out,
|
1052
|
+
frt_os_write_vint(fdt_out, compression_type);
|
1899
1053
|
}
|
1900
1054
|
} else {
|
1901
1055
|
for (j = 0; j < df_size; j++) {
|
1902
1056
|
const int length = df->lengths[j];
|
1057
|
+
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1903
1058
|
frt_os_write_vint(fdt_out, length);
|
1904
1059
|
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
1905
|
-
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
1906
|
-
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1907
|
-
/* leave a space between fields as that is how they are analyzed */
|
1908
|
-
frt_os_write_byte(fw->buffer, ' ');
|
1060
|
+
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
1909
1061
|
}
|
1910
1062
|
}
|
1911
1063
|
}
|
@@ -1932,8 +1084,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
1932
1084
|
FrtPostingList **plists,
|
1933
1085
|
int posting_count,
|
1934
1086
|
FrtOffset *offsets,
|
1935
|
-
int offset_count)
|
1936
|
-
{
|
1087
|
+
int offset_count) {
|
1937
1088
|
int i, delta_start, delta_length;
|
1938
1089
|
const char *last_term = FRT_EMPTY_STRING;
|
1939
1090
|
FrtOutStream *fdt_out = fw->fdt_out;
|
@@ -1997,19 +1148,16 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
1997
1148
|
|
1998
1149
|
#define TE(ste) ((FrtTermEnum *)ste)
|
1999
1150
|
|
2000
|
-
char *frt_te_get_term(FrtTermEnum *te)
|
2001
|
-
{
|
1151
|
+
char *frt_te_get_term(FrtTermEnum *te) {
|
2002
1152
|
return (char *)memcpy(FRT_ALLOC_N(char, te->curr_term_len + 1),
|
2003
1153
|
te->curr_term, te->curr_term_len + 1);
|
2004
1154
|
}
|
2005
1155
|
|
2006
|
-
FrtTermInfo *frt_te_get_ti(FrtTermEnum *te)
|
2007
|
-
{
|
1156
|
+
FrtTermInfo *frt_te_get_ti(FrtTermEnum *te) {
|
2008
1157
|
return (FrtTermInfo*)memcpy(FRT_ALLOC(FrtTermInfo), &(te->curr_ti), sizeof(FrtTermInfo));
|
2009
1158
|
}
|
2010
1159
|
|
2011
|
-
static char *te_skip_to(FrtTermEnum *te, const char *term)
|
2012
|
-
{
|
1160
|
+
static char *te_skip_to(FrtTermEnum *te, const char *term) {
|
2013
1161
|
char *curr_term = te->curr_term;
|
2014
1162
|
if (strcmp(curr_term, term) < 0) {
|
2015
1163
|
while (NULL != ((curr_term = te->next(te)))
|
@@ -2031,8 +1179,7 @@ static char *te_skip_to(FrtTermEnum *te, const char *term)
|
|
2031
1179
|
* SegmentTermIndex
|
2032
1180
|
****************************************************************************/
|
2033
1181
|
|
2034
|
-
static void sti_destroy(FrtSegmentTermIndex *sti)
|
2035
|
-
{
|
1182
|
+
static void sti_destroy(FrtSegmentTermIndex *sti) {
|
2036
1183
|
if (sti->index_terms) {
|
2037
1184
|
int i;
|
2038
1185
|
const int sti_index_cnt = sti->index_cnt;
|
@@ -2076,8 +1223,7 @@ static void sti_ensure_index_is_read(FrtSegmentTermIndex *sti, FrtTermEnum *inde
|
|
2076
1223
|
}
|
2077
1224
|
}
|
2078
1225
|
|
2079
|
-
static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
|
2080
|
-
{
|
1226
|
+
static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term) {
|
2081
1227
|
int lo = 0;
|
2082
1228
|
int hi = sti->index_cnt - 1;
|
2083
1229
|
int mid, delta;
|
@@ -2088,11 +1234,9 @@ static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
|
|
2088
1234
|
delta = strcmp(term, index_terms[mid]);
|
2089
1235
|
if (delta < 0) {
|
2090
1236
|
hi = mid - 1;
|
2091
|
-
}
|
2092
|
-
else if (delta > 0) {
|
1237
|
+
} else if (delta > 0) {
|
2093
1238
|
lo = mid + 1;
|
2094
|
-
}
|
2095
|
-
else {
|
1239
|
+
} else {
|
2096
1240
|
return mid;
|
2097
1241
|
}
|
2098
1242
|
}
|
@@ -2120,7 +1264,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
|
|
2120
1264
|
pthread_mutex_init(&sfi->mutex, NULL);
|
2121
1265
|
|
2122
1266
|
sprintf(file_name, "%s.tfx", segment);
|
2123
|
-
is = store->open_input(store, file_name);
|
1267
|
+
is = store->open_input(store, segm_idx_name, file_name);
|
2124
1268
|
field_count = (int)frt_is_read_u32(is);
|
2125
1269
|
sfi->index_interval = frt_is_read_vint(is);
|
2126
1270
|
sfi->skip_interval = frt_is_read_vint(is);
|
@@ -2139,7 +1283,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
|
|
2139
1283
|
frt_is_close(is);
|
2140
1284
|
|
2141
1285
|
sprintf(file_name, "%s.tix", segment);
|
2142
|
-
is = store->open_input(store, file_name);
|
1286
|
+
is = store->open_input(store, segm_idx_name, file_name);
|
2143
1287
|
FRT_DEREF(is);
|
2144
1288
|
sfi->index_te = frt_ste_new(is, sfi);
|
2145
1289
|
return sfi;
|
@@ -2269,13 +1413,11 @@ void frt_ste_close(FrtTermEnum *te) {
|
|
2269
1413
|
free(te);
|
2270
1414
|
}
|
2271
1415
|
|
2272
|
-
static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
2273
|
-
{
|
1416
|
+
static char *frt_ste_get_term(FrtTermEnum *te, int pos) {
|
2274
1417
|
FrtSegmentTermEnum *ste = STE(te);
|
2275
1418
|
if (pos >= ste->size) {
|
2276
1419
|
return NULL;
|
2277
|
-
}
|
2278
|
-
else if (pos != ste->pos) {
|
1420
|
+
} else if (pos != ste->pos) {
|
2279
1421
|
int idx_int = ste->sfi->index_interval;
|
2280
1422
|
if ((pos < ste->pos) || pos > (1 + ste->pos / idx_int) * idx_int) {
|
2281
1423
|
FrtSegmentTermIndex *sti = (FrtSegmentTermIndex *)frt_h_get_int(ste->sfi->field_dict, te->field_num);
|
@@ -2292,8 +1434,7 @@ static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
|
2292
1434
|
return te->curr_term;
|
2293
1435
|
}
|
2294
1436
|
|
2295
|
-
FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
|
2296
|
-
{
|
1437
|
+
FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi) {
|
2297
1438
|
FrtSegmentTermEnum *ste = ste_allocate();
|
2298
1439
|
|
2299
1440
|
TE(ste)->field_num = -1;
|
@@ -2313,8 +1454,7 @@ FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
|
|
2313
1454
|
|
2314
1455
|
#define MTE(te) ((MultiTermEnum *)(te))
|
2315
1456
|
|
2316
|
-
typedef struct TermEnumWrapper
|
2317
|
-
{
|
1457
|
+
typedef struct TermEnumWrapper {
|
2318
1458
|
int index;
|
2319
1459
|
FrtTermEnum *te;
|
2320
1460
|
int *doc_map;
|
@@ -2322,8 +1462,7 @@ typedef struct TermEnumWrapper
|
|
2322
1462
|
char *term;
|
2323
1463
|
} TermEnumWrapper;
|
2324
1464
|
|
2325
|
-
typedef struct MultiTermEnum
|
2326
|
-
{
|
1465
|
+
typedef struct MultiTermEnum {
|
2327
1466
|
FrtTermEnum te;
|
2328
1467
|
int doc_freq;
|
2329
1468
|
FrtPriorityQueue *tew_queue;
|
@@ -2335,29 +1474,24 @@ typedef struct MultiTermEnum
|
|
2335
1474
|
int *ti_indexes;
|
2336
1475
|
} MultiTermEnum;
|
2337
1476
|
|
2338
|
-
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
2339
|
-
{
|
1477
|
+
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2) {
|
2340
1478
|
int cmpres = strcmp(tew1->term, tew2->term);
|
2341
1479
|
if (0 == cmpres) {
|
2342
1480
|
return tew1->index < tew2->index;
|
2343
|
-
}
|
2344
|
-
else {
|
1481
|
+
} else {
|
2345
1482
|
return cmpres < 0;
|
2346
1483
|
}
|
2347
1484
|
}
|
2348
1485
|
|
2349
|
-
static char *tew_next(TermEnumWrapper *tew)
|
2350
|
-
{
|
1486
|
+
static char *tew_next(TermEnumWrapper *tew) {
|
2351
1487
|
return (tew->term = tew->te->next(tew->te));
|
2352
1488
|
}
|
2353
1489
|
|
2354
|
-
static char *tew_skip_to(TermEnumWrapper *tew, const char *term)
|
2355
|
-
{
|
1490
|
+
static char *tew_skip_to(TermEnumWrapper *tew, const char *term) {
|
2356
1491
|
return (tew->term = tew->te->skip_to(tew->te, term));
|
2357
1492
|
}
|
2358
1493
|
|
2359
|
-
static void tew_destroy(TermEnumWrapper *tew)
|
2360
|
-
{
|
1494
|
+
static void tew_destroy(TermEnumWrapper *tew) {
|
2361
1495
|
frt_ir_close(tew->ir);
|
2362
1496
|
if (tew->doc_map) {
|
2363
1497
|
free(tew->doc_map);
|
@@ -2424,8 +1558,7 @@ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num) {
|
|
2424
1558
|
if (tew_next(tew)) {
|
2425
1559
|
frt_pq_push(mte->tew_queue, tew); /* initialize queue */
|
2426
1560
|
}
|
2427
|
-
}
|
2428
|
-
else {
|
1561
|
+
} else {
|
2429
1562
|
sub_te->field_num = -1;
|
2430
1563
|
}
|
2431
1564
|
|
@@ -2528,7 +1661,7 @@ FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, con
|
|
2528
1661
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
2529
1662
|
|
2530
1663
|
sprintf(file_name, "%s.tis", segment);
|
2531
|
-
FrtInStream *is = store->open_input(store, file_name);
|
1664
|
+
FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
|
2532
1665
|
FRT_DEREF(is);
|
2533
1666
|
tir->orig_te = frt_ste_new(is, sfi);
|
2534
1667
|
tir->thread_te = 0;
|
@@ -2607,7 +1740,7 @@ void frt_tir_close(FrtTermInfosReader *tir) {
|
|
2607
1740
|
|
2608
1741
|
static FrtTermWriter *frt_tw_new(FrtStore *store, char *file_name) {
|
2609
1742
|
FrtTermWriter *tw = FRT_ALLOC_AND_ZERO(FrtTermWriter);
|
2610
|
-
tw->os = store->new_output(store, file_name);
|
1743
|
+
tw->os = store->new_output(store, segm_idx_name, file_name);
|
2611
1744
|
tw->last_term = FRT_EMPTY_STRING;
|
2612
1745
|
return tw;
|
2613
1746
|
}
|
@@ -2634,7 +1767,7 @@ FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index
|
|
2634
1767
|
strcpy(file_name + segment_len, ".tis");
|
2635
1768
|
tiw->tis_writer = frt_tw_new(store, file_name);
|
2636
1769
|
strcpy(file_name + segment_len, ".tfx");
|
2637
|
-
tiw->tfx_out = store->new_output(store, file_name);
|
1770
|
+
tiw->tfx_out = store->new_output(store, segm_idx_name, file_name);
|
2638
1771
|
frt_os_write_u32(tiw->tfx_out, 0); /* make space for field_count */
|
2639
1772
|
|
2640
1773
|
/* The following two numbers are the first numbers written to the field
|
@@ -3002,35 +2135,30 @@ static bool stpe_next(FrtTermDocEnum *tde) {
|
|
3002
2135
|
}
|
3003
2136
|
}
|
3004
2137
|
|
3005
|
-
static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
3006
|
-
{
|
2138
|
+
static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
3007
2139
|
(void)tde; (void)docs; (void)freqs; (void)req_num;
|
3008
2140
|
FRT_RAISE(FRT_ARG_ERROR, "TermPosEnum does not handle processing multiple documents"
|
3009
2141
|
" in one call. Use TermDocEnum instead.");
|
3010
2142
|
return -1;
|
3011
2143
|
}
|
3012
2144
|
|
3013
|
-
static int stpe_next_position(FrtTermDocEnum *tde)
|
3014
|
-
{
|
2145
|
+
static int stpe_next_position(FrtTermDocEnum *tde) {
|
3015
2146
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
3016
2147
|
return (stde->prx_cnt-- > 0) ? stde->position += frt_is_read_vint(stde->prx_in)
|
3017
2148
|
: -1;
|
3018
2149
|
}
|
3019
2150
|
|
3020
|
-
static void stpe_close(FrtTermDocEnum *tde)
|
3021
|
-
{
|
2151
|
+
static void stpe_close(FrtTermDocEnum *tde) {
|
3022
2152
|
frt_is_close(STDE(tde)->prx_in);
|
3023
2153
|
STDE(tde)->prx_in = NULL;
|
3024
2154
|
stde_close(tde);
|
3025
2155
|
}
|
3026
2156
|
|
3027
|
-
static void stpe_skip_prox(FrtSegmentTermDocEnum *stde)
|
3028
|
-
{
|
2157
|
+
static void stpe_skip_prox(FrtSegmentTermDocEnum *stde) {
|
3029
2158
|
frt_is_skip_vints(stde->prx_in, stde->freq);
|
3030
2159
|
}
|
3031
2160
|
|
3032
|
-
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr)
|
3033
|
-
{
|
2161
|
+
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr) {
|
3034
2162
|
frt_is_seek(stde->prx_in, prx_ptr);
|
3035
2163
|
stde->prx_cnt = 0;
|
3036
2164
|
}
|
@@ -3039,8 +2167,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
|
|
3039
2167
|
FrtInStream *frq_in,
|
3040
2168
|
FrtInStream *prx_in,
|
3041
2169
|
FrtBitVector *del_docs,
|
3042
|
-
int skip_interval)
|
3043
|
-
{
|
2170
|
+
int skip_interval) {
|
3044
2171
|
FrtTermDocEnum *tde = frt_stde_new(tir, frq_in, del_docs, skip_interval);
|
3045
2172
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
3046
2173
|
|
@@ -3069,8 +2196,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
|
|
3069
2196
|
|
3070
2197
|
#define MTDE(tde) ((MultiTermDocEnum *)(tde))
|
3071
2198
|
|
3072
|
-
typedef struct MultiTermDocEnum
|
3073
|
-
{
|
2199
|
+
typedef struct MultiTermDocEnum {
|
3074
2200
|
FrtTermDocEnum tde;
|
3075
2201
|
int *starts;
|
3076
2202
|
int base;
|
@@ -3083,16 +2209,14 @@ typedef struct MultiTermDocEnum
|
|
3083
2209
|
FrtTermDocEnum *curr_tde;
|
3084
2210
|
} MultiTermDocEnum;
|
3085
2211
|
|
3086
|
-
static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
3087
|
-
{
|
2212
|
+
static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde) {
|
3088
2213
|
mtde->ptr++;
|
3089
2214
|
while (mtde->ptr < mtde->ir_cnt && !mtde->state[mtde->ptr]) {
|
3090
2215
|
mtde->ptr++;
|
3091
2216
|
}
|
3092
2217
|
if (mtde->ptr >= mtde->ir_cnt) {
|
3093
2218
|
return mtde->curr_tde = NULL;
|
3094
|
-
}
|
3095
|
-
else {
|
2219
|
+
} else {
|
3096
2220
|
FrtTermDocEnum *tde = mtde->irs_tde[mtde->ptr];
|
3097
2221
|
mtde->base = mtde->starts[mtde->ptr];
|
3098
2222
|
return mtde->curr_tde = tde;
|
@@ -3106,8 +2230,7 @@ static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
|
3106
2230
|
}\
|
3107
2231
|
} while (0)
|
3108
2232
|
|
3109
|
-
static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
3110
|
-
{
|
2233
|
+
static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te) {
|
3111
2234
|
int i;
|
3112
2235
|
MultiTermDocEnum *mtde = MTDE(tde);
|
3113
2236
|
memset(mtde->state, 0, mtde->ir_cnt);
|
@@ -3117,11 +2240,9 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
3117
2240
|
mtde->state[index] = 1;
|
3118
2241
|
if (tde->close == stde_close) {
|
3119
2242
|
stde_seek_ti(STDE(tde), MTE(te)->tis + i);
|
3120
|
-
}
|
3121
|
-
else if (tde->close == stpe_close) {
|
2243
|
+
} else if (tde->close == stpe_close) {
|
3122
2244
|
stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
|
3123
|
-
}
|
3124
|
-
else {
|
2245
|
+
} else {
|
3125
2246
|
tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
|
3126
2247
|
}
|
3127
2248
|
}
|
@@ -3130,48 +2251,40 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
3130
2251
|
mtde_next_tde(mtde);
|
3131
2252
|
}
|
3132
2253
|
|
3133
|
-
static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
3134
|
-
{
|
2254
|
+
static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
3135
2255
|
MultiTermDocEnum *mtde = MTDE(tde);
|
3136
2256
|
FrtTermEnum *te = mtde->te;
|
3137
2257
|
char *t;
|
3138
2258
|
te->set_field(te, field_num);
|
3139
2259
|
if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
|
3140
2260
|
mtde_seek_te(tde, te);
|
3141
|
-
}
|
3142
|
-
else {
|
2261
|
+
} else {
|
3143
2262
|
memset(mtde->state, 0, mtde->ir_cnt);
|
3144
2263
|
}
|
3145
2264
|
}
|
3146
2265
|
|
3147
|
-
static int mtde_doc_num(FrtTermDocEnum *tde)
|
3148
|
-
{
|
2266
|
+
static int mtde_doc_num(FrtTermDocEnum *tde) {
|
3149
2267
|
CHECK_CURR_TDE("doc_num");
|
3150
2268
|
return MTDE(tde)->base + MTDE(tde)->curr_tde->doc_num(MTDE(tde)->curr_tde);
|
3151
2269
|
}
|
3152
2270
|
|
3153
|
-
static int mtde_freq(FrtTermDocEnum *tde)
|
3154
|
-
{
|
2271
|
+
static int mtde_freq(FrtTermDocEnum *tde) {
|
3155
2272
|
CHECK_CURR_TDE("freq");
|
3156
2273
|
return MTDE(tde)->curr_tde->freq(MTDE(tde)->curr_tde);
|
3157
2274
|
}
|
3158
2275
|
|
3159
|
-
static bool mtde_next(FrtTermDocEnum *tde)
|
3160
|
-
{
|
2276
|
+
static bool mtde_next(FrtTermDocEnum *tde) {
|
3161
2277
|
MultiTermDocEnum *mtde = MTDE(tde);
|
3162
2278
|
if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
|
3163
2279
|
return true;
|
3164
|
-
}
|
3165
|
-
else if (mtde_next_tde(mtde)) {
|
2280
|
+
} else if (mtde_next_tde(mtde)) {
|
3166
2281
|
return mtde_next(tde);
|
3167
|
-
}
|
3168
|
-
else {
|
2282
|
+
} else {
|
3169
2283
|
return false;
|
3170
2284
|
}
|
3171
2285
|
}
|
3172
2286
|
|
3173
|
-
static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
3174
|
-
{
|
2287
|
+
static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
3175
2288
|
int i, end = 0, last_end = 0, b;
|
3176
2289
|
MultiTermDocEnum *mtde = MTDE(tde);
|
3177
2290
|
while (true) {
|
@@ -3180,24 +2293,21 @@ static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
3180
2293
|
freqs + last_end, req_num - last_end);
|
3181
2294
|
if (end == last_end) { /* none left in segment */
|
3182
2295
|
if (!mtde_next_tde(mtde)) return end;
|
3183
|
-
}
|
3184
|
-
else { /* got some */
|
2296
|
+
} else { /* got some */
|
3185
2297
|
b = mtde->base; /* adjust doc numbers */
|
3186
2298
|
for (i = last_end; i < end; i++) {
|
3187
2299
|
docs[i] += b;
|
3188
2300
|
}
|
3189
2301
|
if (end == req_num) {
|
3190
2302
|
return end;
|
3191
|
-
}
|
3192
|
-
else {
|
2303
|
+
} else {
|
3193
2304
|
last_end = end;
|
3194
2305
|
}
|
3195
2306
|
}
|
3196
2307
|
}
|
3197
2308
|
}
|
3198
2309
|
|
3199
|
-
static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
3200
|
-
{
|
2310
|
+
static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
3201
2311
|
MultiTermDocEnum *mtde = MTDE(tde);
|
3202
2312
|
FrtTermDocEnum *curr_tde;
|
3203
2313
|
while (NULL != (curr_tde = mtde->curr_tde)) {
|
@@ -3211,8 +2321,7 @@ static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
3211
2321
|
return false;
|
3212
2322
|
}
|
3213
2323
|
|
3214
|
-
static void mtde_close(FrtTermDocEnum *tde)
|
3215
|
-
{
|
2324
|
+
static void mtde_close(FrtTermDocEnum *tde) {
|
3216
2325
|
MultiTermDocEnum *mtde = MTDE(tde);
|
3217
2326
|
FrtTermDocEnum *tmp_tde;
|
3218
2327
|
int i = mtde->ir_cnt;
|
@@ -3227,8 +2336,7 @@ static void mtde_close(FrtTermDocEnum *tde)
|
|
3227
2336
|
free(tde);
|
3228
2337
|
}
|
3229
2338
|
|
3230
|
-
static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
|
3231
|
-
{
|
2339
|
+
static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr) {
|
3232
2340
|
MultiTermDocEnum *mtde = FRT_ALLOC_AND_ZERO(MultiTermDocEnum);
|
3233
2341
|
FrtTermDocEnum *tde = TDE(mtde);
|
3234
2342
|
tde->seek = &mtde_seek;
|
@@ -3250,8 +2358,7 @@ static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
|
|
3250
2358
|
return tde;
|
3251
2359
|
}
|
3252
2360
|
|
3253
|
-
static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
|
3254
|
-
{
|
2361
|
+
static FrtTermDocEnum *mtde_new(FrtMultiReader *mr) {
|
3255
2362
|
int i;
|
3256
2363
|
FrtTermDocEnum *tde = mtxe_new(mr);
|
3257
2364
|
tde->next_position = NULL;
|
@@ -3266,14 +2373,12 @@ static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
|
|
3266
2373
|
* MultiTermPosEnum
|
3267
2374
|
****************************************************************************/
|
3268
2375
|
|
3269
|
-
static int mtpe_next_position(FrtTermDocEnum *tde)
|
3270
|
-
{
|
2376
|
+
static int mtpe_next_position(FrtTermDocEnum *tde) {
|
3271
2377
|
CHECK_CURR_TDE("next_position");
|
3272
2378
|
return MTDE(tde)->curr_tde->next_position(MTDE(tde)->curr_tde);
|
3273
2379
|
}
|
3274
2380
|
|
3275
|
-
static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
|
3276
|
-
{
|
2381
|
+
static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr) {
|
3277
2382
|
int i;
|
3278
2383
|
FrtTermDocEnum *tde = mtxe_new(mr);
|
3279
2384
|
tde->next_position = &mtpe_next_position;
|
@@ -3293,8 +2398,7 @@ static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
|
|
3293
2398
|
#define MTDPE(tde) ((MultipleTermDocPosEnum *)(tde))
|
3294
2399
|
#define MTDPE_POS_QUEUE_INIT_CAPA 8
|
3295
2400
|
|
3296
|
-
typedef struct
|
3297
|
-
{
|
2401
|
+
typedef struct {
|
3298
2402
|
FrtTermDocEnum tde;
|
3299
2403
|
int doc_num;
|
3300
2404
|
int freq;
|
@@ -3309,8 +2413,7 @@ static void tde_destroy(FrtTermDocEnum *tde) {
|
|
3309
2413
|
tde->close(tde);
|
3310
2414
|
}
|
3311
2415
|
|
3312
|
-
static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
3313
|
-
{
|
2416
|
+
static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
3314
2417
|
(void)tde;
|
3315
2418
|
(void)field_num;
|
3316
2419
|
(void)term;
|
@@ -3318,18 +2421,15 @@ static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
|
3318
2421
|
" the #seek operation");
|
3319
2422
|
}
|
3320
2423
|
|
3321
|
-
static int mtdpe_doc_num(FrtTermDocEnum *tde)
|
3322
|
-
{
|
2424
|
+
static int mtdpe_doc_num(FrtTermDocEnum *tde) {
|
3323
2425
|
return MTDPE(tde)->doc_num;
|
3324
2426
|
}
|
3325
2427
|
|
3326
|
-
static int mtdpe_freq(FrtTermDocEnum *tde)
|
3327
|
-
{
|
2428
|
+
static int mtdpe_freq(FrtTermDocEnum *tde) {
|
3328
2429
|
return MTDPE(tde)->freq;
|
3329
2430
|
}
|
3330
2431
|
|
3331
|
-
static bool mtdpe_next(FrtTermDocEnum *tde)
|
3332
|
-
{
|
2432
|
+
static bool mtdpe_next(FrtTermDocEnum *tde) {
|
3333
2433
|
FrtTermDocEnum *sub_tde;
|
3334
2434
|
int pos = 0, freq = 0;
|
3335
2435
|
int doc;
|
@@ -3358,8 +2458,7 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
|
|
3358
2458
|
|
3359
2459
|
if (sub_tde->next(sub_tde)) {
|
3360
2460
|
frt_pq_down(mtdpe->pq);
|
3361
|
-
}
|
3362
|
-
else {
|
2461
|
+
} else {
|
3363
2462
|
sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe->pq);
|
3364
2463
|
sub_tde->close(sub_tde);
|
3365
2464
|
}
|
@@ -3375,13 +2474,11 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
|
|
3375
2474
|
return true;
|
3376
2475
|
}
|
3377
2476
|
|
3378
|
-
static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2)
|
3379
|
-
{
|
2477
|
+
static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2) {
|
3380
2478
|
return p1->doc_num(p1) < p2->doc_num(p2);
|
3381
2479
|
}
|
3382
2480
|
|
3383
|
-
static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
3384
|
-
{
|
2481
|
+
static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
3385
2482
|
FrtTermDocEnum *sub_tde;
|
3386
2483
|
FrtPriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
|
3387
2484
|
|
@@ -3389,8 +2486,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
3389
2486
|
&& (target_doc_num > sub_tde->doc_num(sub_tde))) {
|
3390
2487
|
if (sub_tde->skip_to(sub_tde, target_doc_num)) {
|
3391
2488
|
frt_pq_down(mtdpe_pq);
|
3392
|
-
}
|
3393
|
-
else {
|
2489
|
+
} else {
|
3394
2490
|
sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe_pq);
|
3395
2491
|
sub_tde->close(sub_tde);
|
3396
2492
|
}
|
@@ -3398,8 +2494,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
3398
2494
|
return tde->next(tde);
|
3399
2495
|
}
|
3400
2496
|
|
3401
|
-
static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
3402
|
-
{
|
2497
|
+
static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
3403
2498
|
(void)tde;
|
3404
2499
|
(void)docs;
|
3405
2500
|
(void)freqs;
|
@@ -3408,21 +2503,18 @@ static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
3408
2503
|
return req_num;
|
3409
2504
|
}
|
3410
2505
|
|
3411
|
-
static int mtdpe_next_position(FrtTermDocEnum *tde)
|
3412
|
-
{
|
2506
|
+
static int mtdpe_next_position(FrtTermDocEnum *tde) {
|
3413
2507
|
return MTDPE(tde)->pos_queue[MTDPE(tde)->pos_queue_index++];
|
3414
2508
|
}
|
3415
2509
|
|
3416
|
-
static void mtdpe_close(FrtTermDocEnum *tde)
|
3417
|
-
{
|
2510
|
+
static void mtdpe_close(FrtTermDocEnum *tde) {
|
3418
2511
|
frt_pq_clear(MTDPE(tde)->pq);
|
3419
2512
|
frt_pq_destroy(MTDPE(tde)->pq);
|
3420
2513
|
free(MTDPE(tde)->pos_queue);
|
3421
2514
|
free(tde);
|
3422
2515
|
}
|
3423
2516
|
|
3424
|
-
FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt)
|
3425
|
-
{
|
2517
|
+
FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt) {
|
3426
2518
|
int i;
|
3427
2519
|
MultipleTermDocPosEnum *mtdpe = FRT_ALLOC_AND_ZERO(MultipleTermDocPosEnum);
|
3428
2520
|
FrtTermDocEnum *tde = TDE(mtdpe);
|
@@ -3437,8 +2529,7 @@ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, i
|
|
3437
2529
|
tpe->seek(tpe, field_num, terms[i]);
|
3438
2530
|
if (tpe->next(tpe)) {
|
3439
2531
|
frt_pq_push(pq, tpe);
|
3440
|
-
}
|
3441
|
-
else {
|
2532
|
+
} else {
|
3442
2533
|
tpe->close(tpe);
|
3443
2534
|
}
|
3444
2535
|
}
|
@@ -3471,26 +2562,22 @@ static void file_name_filter_init(void) {
|
|
3471
2562
|
frt_register_for_cleanup(fn_extensions, (frt_free_ft)&frt_h_destroy);
|
3472
2563
|
}
|
3473
2564
|
|
3474
|
-
bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks)
|
3475
|
-
{
|
2565
|
+
bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks) {
|
3476
2566
|
char *p = strrchr(file_name, '.');
|
3477
2567
|
if (NULL == fn_extensions) file_name_filter_init();
|
3478
2568
|
if (NULL != p) {
|
3479
2569
|
char *extension = p + 1;
|
3480
2570
|
if (NULL != frt_h_get(fn_extensions, extension)) {
|
3481
2571
|
return true;
|
3482
|
-
}
|
3483
|
-
else if ((*extension == 'f' || *extension == 's')
|
2572
|
+
} else if ((*extension == 'f' || *extension == 's')
|
3484
2573
|
&& *(extension + 1) >= '0'
|
3485
2574
|
&& *(extension + 1) <= '9') {
|
3486
2575
|
return true;
|
3487
|
-
}
|
3488
|
-
else if (include_locks && (strcmp(extension, "lck") == 0)
|
2576
|
+
} else if (include_locks && (strcmp(extension, "lck") == 0)
|
3489
2577
|
&& (strncmp(file_name, "ferret", 6) == 0)) {
|
3490
2578
|
return true;
|
3491
2579
|
}
|
3492
|
-
}
|
3493
|
-
else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
|
2580
|
+
} else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
|
3494
2581
|
sizeof(FRT_SEGMENTS_FILE_NAME) - 1)) {
|
3495
2582
|
return true;
|
3496
2583
|
}
|
@@ -3526,8 +2613,8 @@ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
|
|
3526
2613
|
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
|
3527
2614
|
FrtStore *store = dlr->store;
|
3528
2615
|
FRT_TRY
|
3529
|
-
if (store->exists(store, file_name)) {
|
3530
|
-
store->remove(store, file_name);
|
2616
|
+
if (store->exists(store, segm_idx_name, file_name)) {
|
2617
|
+
store->remove(store, segm_idx_name, file_name);
|
3531
2618
|
}
|
3532
2619
|
frt_hs_del(dlr->pending, file_name);
|
3533
2620
|
FRT_XCATCHALL
|
@@ -3642,20 +2729,18 @@ void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
|
|
3642
2729
|
* info: */
|
3643
2730
|
frt_sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
|
3644
2731
|
|
3645
|
-
store->each(store, &frt_deleter_find_deletable_files_i, &dfa);
|
2732
|
+
store->each(store, segm_idx_name, &frt_deleter_find_deletable_files_i, &dfa);
|
3646
2733
|
frt_h_destroy(dfa.current);
|
3647
2734
|
}
|
3648
2735
|
|
3649
|
-
static void deleter_delete_deletable_files(FrtDeleter *dlr)
|
3650
|
-
{
|
2736
|
+
static void deleter_delete_deletable_files(FrtDeleter *dlr) {
|
3651
2737
|
frt_deleter_find_deletable_files(dlr);
|
3652
2738
|
deleter_commit_pending_deletions(dlr);
|
3653
2739
|
}
|
3654
2740
|
|
3655
2741
|
/*
|
3656
2742
|
TODO: currently not used. Why not?
|
3657
|
-
static void deleter_clear_pending_deletions(FrtDeleter *dlr)
|
3658
|
-
{
|
2743
|
+
static void deleter_clear_pending_deletions(FrtDeleter *dlr) {
|
3659
2744
|
frt_hs_clear(dlr->pending);
|
3660
2745
|
}
|
3661
2746
|
*/
|
@@ -3666,14 +2751,12 @@ static void deleter_clear_pending_deletions(FrtDeleter *dlr)
|
|
3666
2751
|
*
|
3667
2752
|
****************************************************************************/
|
3668
2753
|
|
3669
|
-
static void ir_acquire_not_necessary(FrtIndexReader *ir)
|
3670
|
-
{
|
2754
|
+
static void ir_acquire_not_necessary(FrtIndexReader *ir) {
|
3671
2755
|
(void)ir;
|
3672
2756
|
}
|
3673
2757
|
|
3674
2758
|
#define I64_PFX POSH_I64_PRINTF_PREFIX
|
3675
|
-
static void ir_acquire_write_lock(FrtIndexReader *ir)
|
3676
|
-
{
|
2759
|
+
static void ir_acquire_write_lock(FrtIndexReader *ir) {
|
3677
2760
|
if (ir->is_stale) {
|
3678
2761
|
FRT_RAISE(FRT_STATE_ERROR, "IndexReader out of date and no longer valid for "
|
3679
2762
|
"delete, undelete, or set_norm operations. To "
|
@@ -3682,7 +2765,7 @@ static void ir_acquire_write_lock(FrtIndexReader *ir)
|
|
3682
2765
|
}
|
3683
2766
|
|
3684
2767
|
if (NULL == ir->write_lock) {
|
3685
|
-
ir->write_lock = frt_open_lock(ir->store, FRT_WRITE_LOCK_NAME);
|
2768
|
+
ir->write_lock = frt_open_lock(ir->store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
3686
2769
|
if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
|
3687
2770
|
FRT_RAISE(FRT_LOCK_ERROR, "Could not obtain write lock when trying to "
|
3688
2771
|
"write changes to the index. Check that there "
|
@@ -3728,8 +2811,7 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3728
2811
|
ir->is_owner = is_owner;
|
3729
2812
|
if (is_owner) {
|
3730
2813
|
ir->acquire_write_lock = &ir_acquire_write_lock;
|
3731
|
-
}
|
3732
|
-
else {
|
2814
|
+
} else {
|
3733
2815
|
ir->acquire_write_lock = &ir_acquire_not_necessary;
|
3734
2816
|
}
|
3735
2817
|
|
@@ -3740,8 +2822,7 @@ int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term) {
|
|
3740
2822
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3741
2823
|
if (field_num >= 0) {
|
3742
2824
|
return ir->doc_freq(ir, field_num, term);
|
3743
|
-
}
|
3744
|
-
else {
|
2825
|
+
} else {
|
3745
2826
|
return 0;
|
3746
2827
|
}
|
3747
2828
|
}
|
@@ -3761,8 +2842,7 @@ void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val) {
|
|
3761
2842
|
}
|
3762
2843
|
}
|
3763
2844
|
|
3764
|
-
frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num)
|
3765
|
-
{
|
2845
|
+
frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num) {
|
3766
2846
|
frt_uchar *norms = NULL;
|
3767
2847
|
if (field_num >= 0) {
|
3768
2848
|
norms = ir->get_norms(ir, field_num);
|
@@ -3785,15 +2865,13 @@ frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
|
|
3785
2865
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3786
2866
|
if (field_num >= 0) {
|
3787
2867
|
ir->get_norms_into(ir, field_num, buf);
|
3788
|
-
}
|
3789
|
-
else {
|
2868
|
+
} else {
|
3790
2869
|
memset(buf, 0, ir->max_doc(ir));
|
3791
2870
|
}
|
3792
2871
|
return buf;
|
3793
2872
|
}
|
3794
2873
|
|
3795
|
-
void frt_ir_undelete_all(FrtIndexReader *ir)
|
3796
|
-
{
|
2874
|
+
void frt_ir_undelete_all(FrtIndexReader *ir) {
|
3797
2875
|
pthread_mutex_lock(&ir->mutex);
|
3798
2876
|
ir->acquire_write_lock(ir);
|
3799
2877
|
ir->undelete_all_i(ir);
|
@@ -3801,8 +2879,7 @@ void frt_ir_undelete_all(FrtIndexReader *ir)
|
|
3801
2879
|
pthread_mutex_unlock(&ir->mutex);
|
3802
2880
|
}
|
3803
2881
|
|
3804
|
-
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num)
|
3805
|
-
{
|
2882
|
+
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num) {
|
3806
2883
|
if (doc_num >= 0 && doc_num < ir->max_doc(ir)) {
|
3807
2884
|
pthread_mutex_lock(&ir->mutex);
|
3808
2885
|
ir->acquire_write_lock(ir);
|
@@ -3861,8 +2938,7 @@ FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const ch
|
|
3861
2938
|
return tde;
|
3862
2939
|
}
|
3863
2940
|
|
3864
|
-
static void ir_commit_i(FrtIndexReader *ir)
|
3865
|
-
{
|
2941
|
+
static void ir_commit_i(FrtIndexReader *ir) {
|
3866
2942
|
if (ir->has_changes) {
|
3867
2943
|
if (NULL == ir->deleter && NULL != ir->store) {
|
3868
2944
|
/* In the MultiReader case, we share this deleter across all
|
@@ -3888,16 +2964,14 @@ static void ir_commit_i(FrtIndexReader *ir)
|
|
3888
2964
|
frt_close_lock(ir->write_lock);
|
3889
2965
|
ir->write_lock = NULL;
|
3890
2966
|
}
|
3891
|
-
}
|
3892
|
-
else {
|
2967
|
+
} else {
|
3893
2968
|
ir->commit_i(ir);
|
3894
2969
|
}
|
3895
2970
|
}
|
3896
2971
|
ir->has_changes = false;
|
3897
2972
|
}
|
3898
2973
|
|
3899
|
-
void frt_ir_commit(FrtIndexReader *ir)
|
3900
|
-
{
|
2974
|
+
void frt_ir_commit(FrtIndexReader *ir) {
|
3901
2975
|
pthread_mutex_lock(&ir->mutex);
|
3902
2976
|
ir_commit_i(ir);
|
3903
2977
|
pthread_mutex_unlock(&ir->mutex);
|
@@ -3930,15 +3004,13 @@ void frt_ir_close(FrtIndexReader *ir) {
|
|
3930
3004
|
/**
|
3931
3005
|
* Don't call this method if the cache already exists
|
3932
3006
|
**/
|
3933
|
-
void frt_ir_add_cache(FrtIndexReader *ir)
|
3934
|
-
{
|
3007
|
+
void frt_ir_add_cache(FrtIndexReader *ir) {
|
3935
3008
|
if (NULL == ir->cache) {
|
3936
3009
|
ir->cache = frt_co_hash_create();
|
3937
3010
|
}
|
3938
3011
|
}
|
3939
3012
|
|
3940
|
-
bool frt_ir_is_latest(FrtIndexReader *ir)
|
3941
|
-
{
|
3013
|
+
bool frt_ir_is_latest(FrtIndexReader *ir) {
|
3942
3014
|
return ir->is_latest_i(ir);
|
3943
3015
|
}
|
3944
3016
|
|
@@ -3953,8 +3025,7 @@ typedef struct Norm {
|
|
3953
3025
|
bool is_dirty : 1;
|
3954
3026
|
} Norm;
|
3955
3027
|
|
3956
|
-
static Norm *norm_create(FrtInStream *is, int field_num)
|
3957
|
-
{
|
3028
|
+
static Norm *norm_create(FrtInStream *is, int field_num) {
|
3958
3029
|
Norm *norm = FRT_ALLOC(Norm);
|
3959
3030
|
|
3960
3031
|
norm->is = is;
|
@@ -3966,8 +3037,7 @@ static Norm *norm_create(FrtInStream *is, int field_num)
|
|
3966
3037
|
return norm;
|
3967
3038
|
}
|
3968
3039
|
|
3969
|
-
static void norm_destroy(Norm *norm)
|
3970
|
-
{
|
3040
|
+
static void norm_destroy(Norm *norm) {
|
3971
3041
|
frt_is_close(norm->is);
|
3972
3042
|
if (NULL != norm->bytes) {
|
3973
3043
|
free(norm->bytes);
|
@@ -3976,8 +3046,7 @@ static void norm_destroy(Norm *norm)
|
|
3976
3046
|
}
|
3977
3047
|
|
3978
3048
|
static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
3979
|
-
FrtSegmentInfo *si, int doc_count)
|
3980
|
-
{
|
3049
|
+
FrtSegmentInfo *si, int doc_count) {
|
3981
3050
|
FrtOutStream *os;
|
3982
3051
|
char norm_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3983
3052
|
const int field_num = norm->field_num;
|
@@ -3987,7 +3056,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
|
3987
3056
|
}
|
3988
3057
|
frt_si_advance_norm_gen(si, field_num);
|
3989
3058
|
si_norm_file_name(si, norm_file_name, field_num);
|
3990
|
-
os = store->new_output(store, norm_file_name);
|
3059
|
+
os = store->new_output(store, segm_idx_name, norm_file_name);
|
3991
3060
|
frt_os_write_bytes(os, norm->bytes, doc_count);
|
3992
3061
|
frt_os_close(os);
|
3993
3062
|
norm->is_dirty = false;
|
@@ -4011,8 +3080,7 @@ static FrtFieldsReader *sr_fr(FrtSegmentReader *sr) {
|
|
4011
3080
|
return fr;
|
4012
3081
|
}
|
4013
3082
|
|
4014
|
-
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num)
|
4015
|
-
{
|
3083
|
+
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num) {
|
4016
3084
|
return (NULL != sr->deleted_docs && frt_bv_get(sr->deleted_docs, doc_num));
|
4017
3085
|
}
|
4018
3086
|
|
@@ -4080,10 +3148,9 @@ static void sr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
|
|
4080
3148
|
ir->deleter = deleter;
|
4081
3149
|
}
|
4082
3150
|
|
4083
|
-
static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
|
4084
|
-
{
|
3151
|
+
static void bv_write(FrtBitVector *bv, FrtStore *store, char *name) {
|
4085
3152
|
int i;
|
4086
|
-
FrtOutStream *os = store->new_output(store, name);
|
3153
|
+
FrtOutStream *os = store->new_output(store, segm_idx_name, name);
|
4087
3154
|
frt_os_write_vint(os, bv->size);
|
4088
3155
|
for (i = ((bv->size-1) >> 5); i >= 0; i--) {
|
4089
3156
|
frt_os_write_u32(os, bv->bits[i]);
|
@@ -4091,11 +3158,10 @@ static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
|
|
4091
3158
|
frt_os_close(os);
|
4092
3159
|
}
|
4093
3160
|
|
4094
|
-
static FrtBitVector *bv_read(FrtStore *store, char *name)
|
4095
|
-
{
|
3161
|
+
static FrtBitVector *bv_read(FrtStore *store, char *name) {
|
4096
3162
|
int i;
|
4097
3163
|
volatile bool success = false;
|
4098
|
-
FrtInStream *volatile is = store->open_input(store, name);
|
3164
|
+
FrtInStream *volatile is = store->open_input(store, segm_idx_name, name);
|
4099
3165
|
FrtBitVector *volatile bv = FRT_ALLOC_AND_ZERO(FrtBitVector);
|
4100
3166
|
bv->size = (int)frt_is_read_vint(is);
|
4101
3167
|
bv->capa = (bv->size >> 5) + 1;
|
@@ -4114,13 +3180,11 @@ static FrtBitVector *bv_read(FrtStore *store, char *name)
|
|
4114
3180
|
return bv;
|
4115
3181
|
}
|
4116
3182
|
|
4117
|
-
static bool sr_is_latest_i(FrtIndexReader *ir)
|
4118
|
-
{
|
3183
|
+
static bool sr_is_latest_i(FrtIndexReader *ir) {
|
4119
3184
|
return (frt_sis_read_current_version(ir->store) == ir->sis->version);
|
4120
3185
|
}
|
4121
3186
|
|
4122
|
-
static void sr_commit_i(FrtIndexReader *ir)
|
4123
|
-
{
|
3187
|
+
static void sr_commit_i(FrtIndexReader *ir) {
|
4124
3188
|
FrtSegmentInfo *si = SR(ir)->si;
|
4125
3189
|
char *segment = SR(ir)->si->name;
|
4126
3190
|
char tmp_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -4133,8 +3197,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
4133
3197
|
if (SR(ir)->undelete_all) {
|
4134
3198
|
si->del_gen = -1;
|
4135
3199
|
SR(ir)->undelete_all = false;
|
4136
|
-
}
|
4137
|
-
else {
|
3200
|
+
} else {
|
4138
3201
|
/* (SR(ir)->deleted_docs_dirty) re-write deleted */
|
4139
3202
|
si->del_gen++;
|
4140
3203
|
frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
@@ -4177,8 +3240,7 @@ static void sr_close_i(FrtIndexReader *ir) {
|
|
4177
3240
|
}
|
4178
3241
|
}
|
4179
3242
|
|
4180
|
-
static int sr_num_docs(FrtIndexReader *ir)
|
4181
|
-
{
|
3243
|
+
static int sr_num_docs(FrtIndexReader *ir) {
|
4182
3244
|
int num_docs;
|
4183
3245
|
|
4184
3246
|
pthread_mutex_lock(&ir->mutex);
|
@@ -4190,13 +3252,11 @@ static int sr_num_docs(FrtIndexReader *ir)
|
|
4190
3252
|
return num_docs;
|
4191
3253
|
}
|
4192
3254
|
|
4193
|
-
static int sr_max_doc(FrtIndexReader *ir)
|
4194
|
-
{
|
3255
|
+
static int sr_max_doc(FrtIndexReader *ir) {
|
4195
3256
|
return SR(ir)->fr->size;
|
4196
3257
|
}
|
4197
3258
|
|
4198
|
-
static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
|
4199
|
-
{
|
3259
|
+
static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num) {
|
4200
3260
|
FrtDocument *doc;
|
4201
3261
|
pthread_mutex_lock(&ir->mutex);
|
4202
3262
|
if (sr_is_deleted_i(SR(ir), doc_num)) {
|
@@ -4208,8 +3268,7 @@ static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
|
|
4208
3268
|
return doc;
|
4209
3269
|
}
|
4210
3270
|
|
4211
|
-
static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
4212
|
-
{
|
3271
|
+
static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
|
4213
3272
|
FrtLazyDoc *lazy_doc;
|
4214
3273
|
pthread_mutex_lock(&ir->mutex);
|
4215
3274
|
if (sr_is_deleted_i(SR(ir), doc_num)) {
|
@@ -4221,8 +3280,7 @@ static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
|
4221
3280
|
return lazy_doc;
|
4222
3281
|
}
|
4223
3282
|
|
4224
|
-
static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
|
4225
|
-
{
|
3283
|
+
static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num) {
|
4226
3284
|
frt_uchar *norms;
|
4227
3285
|
pthread_mutex_lock(&ir->mutex);
|
4228
3286
|
norms = sr_get_norms_i(SR(ir), field_num);
|
@@ -4231,23 +3289,20 @@ static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
|
|
4231
3289
|
}
|
4232
3290
|
|
4233
3291
|
static frt_uchar *sr_get_norms_into(FrtIndexReader *ir, int field_num,
|
4234
|
-
frt_uchar *buf)
|
4235
|
-
{
|
3292
|
+
frt_uchar *buf) {
|
4236
3293
|
pthread_mutex_lock(&ir->mutex);
|
4237
3294
|
sr_get_norms_into_i(SR(ir), field_num, buf);
|
4238
3295
|
pthread_mutex_unlock(&ir->mutex);
|
4239
3296
|
return buf;
|
4240
3297
|
}
|
4241
3298
|
|
4242
|
-
static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num)
|
4243
|
-
{
|
3299
|
+
static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num) {
|
4244
3300
|
FrtTermEnum *te = SR(ir)->tir->orig_te;
|
4245
3301
|
te = frt_ste_clone(te);
|
4246
3302
|
return ste_set_field(te, field_num);
|
4247
3303
|
}
|
4248
3304
|
|
4249
|
-
static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
|
4250
|
-
{
|
3305
|
+
static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
|
4251
3306
|
FrtTermEnum *te = SR(ir)->tir->orig_te;
|
4252
3307
|
te = frt_ste_clone(te);
|
4253
3308
|
ste_set_field(te, field_num);
|
@@ -4255,20 +3310,17 @@ static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char
|
|
4255
3310
|
return te;
|
4256
3311
|
}
|
4257
3312
|
|
4258
|
-
static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term)
|
4259
|
-
{
|
3313
|
+
static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term) {
|
4260
3314
|
FrtTermInfo *ti = frt_tir_get_ti(frt_tir_set_field(SR(ir)->tir, field_num), term);
|
4261
3315
|
return ti ? ti->doc_freq : 0;
|
4262
3316
|
}
|
4263
3317
|
|
4264
|
-
static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir)
|
4265
|
-
{
|
3318
|
+
static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir) {
|
4266
3319
|
return frt_stde_new(SR(ir)->tir, SR(ir)->frq_in, SR(ir)->deleted_docs,
|
4267
3320
|
STE(SR(ir)->tir->orig_te)->skip_interval);
|
4268
3321
|
}
|
4269
3322
|
|
4270
|
-
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir)
|
4271
|
-
{
|
3323
|
+
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir) {
|
4272
3324
|
FrtSegmentReader *sr = SR(ir);
|
4273
3325
|
return frt_stpe_new(sr->tir, sr->frq_in, sr->prx_in, sr->deleted_docs,
|
4274
3326
|
STE(sr->tir->orig_te)->skip_interval);
|
@@ -4285,8 +3337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
4285
3337
|
return frt_fr_get_field_tv(fr, doc_num, fi->number);
|
4286
3338
|
}
|
4287
3339
|
|
4288
|
-
static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
|
4289
|
-
{
|
3340
|
+
static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num) {
|
4290
3341
|
FrtFieldsReader *fr;
|
4291
3342
|
if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
|
4292
3343
|
return NULL;
|
@@ -4295,8 +3346,7 @@ static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
|
|
4295
3346
|
return frt_fr_get_tv(fr, doc_num);
|
4296
3347
|
}
|
4297
3348
|
|
4298
|
-
static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
|
4299
|
-
{
|
3349
|
+
static bool sr_is_deleted(FrtIndexReader *ir, int doc_num) {
|
4300
3350
|
bool is_del;
|
4301
3351
|
|
4302
3352
|
pthread_mutex_lock(&ir->mutex);
|
@@ -4306,13 +3356,11 @@ static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
|
|
4306
3356
|
return is_del;
|
4307
3357
|
}
|
4308
3358
|
|
4309
|
-
static bool sr_has_deletions(FrtIndexReader *ir)
|
4310
|
-
{
|
3359
|
+
static bool sr_has_deletions(FrtIndexReader *ir) {
|
4311
3360
|
return NULL != SR(ir)->deleted_docs;
|
4312
3361
|
}
|
4313
3362
|
|
4314
|
-
static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
4315
|
-
{
|
3363
|
+
static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store) {
|
4316
3364
|
int i;
|
4317
3365
|
FrtSegmentInfo *si = SR(ir)->si;
|
4318
3366
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -4320,7 +3368,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
4320
3368
|
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
4321
3369
|
FrtStore *store = ir->store;
|
4322
3370
|
if (si_norm_file_name(si, file_name, i)) {
|
4323
|
-
FrtInStream *is = store->open_input(store, file_name);
|
3371
|
+
FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
|
4324
3372
|
FRT_DEREF(is);
|
4325
3373
|
frt_h_set_int(SR(ir)->norms, i, norm_create(is, i));
|
4326
3374
|
}
|
@@ -4328,8 +3376,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
4328
3376
|
SR(ir)->norms_dirty = false;
|
4329
3377
|
}
|
4330
3378
|
|
4331
|
-
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
4332
|
-
{
|
3379
|
+
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr) {
|
4333
3380
|
FrtStore *volatile store = sr->si->store;
|
4334
3381
|
FrtIndexReader *ir = IR(sr);
|
4335
3382
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -4378,12 +3425,12 @@ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
|
4378
3425
|
}
|
4379
3426
|
|
4380
3427
|
sprintf(file_name, "%s.frq", sr_segment);
|
4381
|
-
sr->frq_in = store->open_input(store, file_name);
|
3428
|
+
sr->frq_in = store->open_input(store, segm_idx_name, file_name);
|
4382
3429
|
sprintf(file_name, "%s.prx", sr_segment);
|
4383
|
-
sr->prx_in = store->open_input(store, file_name);
|
3430
|
+
sr->prx_in = store->open_input(store, segm_idx_name, file_name);
|
4384
3431
|
sr->norms = frt_h_new_int((frt_free_ft)&norm_destroy);
|
4385
3432
|
sr_open_norms(ir, store);
|
4386
|
-
if (
|
3433
|
+
if (frt_fis_has_vectors(ir->fis)) {
|
4387
3434
|
frb_thread_key_create(&sr->thread_fr, NULL);
|
4388
3435
|
sr->fr_bucket = frt_ary_new();
|
4389
3436
|
}
|
@@ -4413,8 +3460,7 @@ static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_
|
|
4413
3460
|
|
4414
3461
|
#define MR(ir) ((FrtMultiReader *)(ir))
|
4415
3462
|
|
4416
|
-
static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
4417
|
-
{
|
3463
|
+
static int mr_reader_index_i(FrtMultiReader *mr, int doc_num) {
|
4418
3464
|
int lo = 0; /* search @starts array */
|
4419
3465
|
int hi = mr->r_cnt - 1; /* for first element less */
|
4420
3466
|
int mid;
|
@@ -4425,11 +3471,9 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
|
4425
3471
|
mid_value = mr->starts[mid];
|
4426
3472
|
if (doc_num < mid_value) {
|
4427
3473
|
hi = mid - 1;
|
4428
|
-
}
|
4429
|
-
else if (doc_num > mid_value) {
|
3474
|
+
} else if (doc_num > mid_value) {
|
4430
3475
|
lo = mid + 1;
|
4431
|
-
}
|
4432
|
-
else { /* found a match */
|
3476
|
+
} else { /* found a match */
|
4433
3477
|
while ((mid+1 < mr->r_cnt) && (mr->starts[mid+1] == mid_value)) {
|
4434
3478
|
mid += 1; /* scan to last match in case we have empty segments */
|
4435
3479
|
}
|
@@ -4439,8 +3483,7 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
|
4439
3483
|
return hi;
|
4440
3484
|
}
|
4441
3485
|
|
4442
|
-
static int mr_num_docs(FrtIndexReader *ir)
|
4443
|
-
{
|
3486
|
+
static int mr_num_docs(FrtIndexReader *ir) {
|
4444
3487
|
int i, num_docs;
|
4445
3488
|
pthread_mutex_lock(&ir->mutex);
|
4446
3489
|
if (MR(ir)->num_docs_cache == -1) {
|
@@ -4457,8 +3500,7 @@ static int mr_num_docs(FrtIndexReader *ir)
|
|
4457
3500
|
return num_docs;
|
4458
3501
|
}
|
4459
3502
|
|
4460
|
-
static int mr_max_doc(FrtIndexReader *ir)
|
4461
|
-
{
|
3503
|
+
static int mr_max_doc(FrtIndexReader *ir) {
|
4462
3504
|
return MR(ir)->max_doc;
|
4463
3505
|
}
|
4464
3506
|
|
@@ -4466,30 +3508,25 @@ static int mr_max_doc(FrtIndexReader *ir)
|
|
4466
3508
|
int i = mr_reader_index_i(MR(ir), doc_num);\
|
4467
3509
|
FrtIndexReader *reader = MR(ir)->sub_readers[i]
|
4468
3510
|
|
4469
|
-
static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num)
|
4470
|
-
{
|
3511
|
+
static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num) {
|
4471
3512
|
GET_READER();
|
4472
3513
|
return reader->get_doc(reader, doc_num - MR(ir)->starts[i]);
|
4473
3514
|
}
|
4474
3515
|
|
4475
|
-
static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
4476
|
-
{
|
3516
|
+
static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
|
4477
3517
|
GET_READER();
|
4478
3518
|
return reader->get_lazy_doc(reader, doc_num - MR(ir)->starts[i]);
|
4479
3519
|
}
|
4480
3520
|
|
4481
|
-
int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num)
|
4482
|
-
{
|
3521
|
+
int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num) {
|
4483
3522
|
if (mr->field_num_map) {
|
4484
3523
|
return mr->field_num_map[ir_num][f_num];
|
4485
|
-
}
|
4486
|
-
else {
|
3524
|
+
} else {
|
4487
3525
|
return f_num;
|
4488
3526
|
}
|
4489
3527
|
}
|
4490
3528
|
|
4491
|
-
static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
|
4492
|
-
{
|
3529
|
+
static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num) {
|
4493
3530
|
frt_uchar *bytes;
|
4494
3531
|
|
4495
3532
|
pthread_mutex_lock(&ir->mutex);
|
@@ -4514,16 +3551,14 @@ static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
|
|
4514
3551
|
return bytes;
|
4515
3552
|
}
|
4516
3553
|
|
4517
|
-
static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf)
|
4518
|
-
{
|
3554
|
+
static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf) {
|
4519
3555
|
frt_uchar *bytes;
|
4520
3556
|
|
4521
3557
|
pthread_mutex_lock(&ir->mutex);
|
4522
3558
|
bytes = (frt_uchar *)frt_h_get_int(MR(ir)->norms_cache, field_num);
|
4523
3559
|
if (NULL != bytes) {
|
4524
3560
|
memcpy(buf, bytes, MR(ir)->max_doc);
|
4525
|
-
}
|
4526
|
-
else {
|
3561
|
+
} else {
|
4527
3562
|
int i;
|
4528
3563
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4529
3564
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4538,18 +3573,15 @@ static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar
|
|
4538
3573
|
return buf;
|
4539
3574
|
}
|
4540
3575
|
|
4541
|
-
static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num)
|
4542
|
-
{
|
3576
|
+
static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num) {
|
4543
3577
|
return frt_mte_new(MR(ir), field_num, NULL);
|
4544
3578
|
}
|
4545
3579
|
|
4546
|
-
static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
|
4547
|
-
{
|
3580
|
+
static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
|
4548
3581
|
return frt_mte_new(MR(ir), field_num, term);
|
4549
3582
|
}
|
4550
3583
|
|
4551
|
-
static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
|
4552
|
-
{
|
3584
|
+
static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t) {
|
4553
3585
|
int total = 0; /* sum freqs in segments */
|
4554
3586
|
int i = MR(ir)->r_cnt;
|
4555
3587
|
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
@@ -4562,13 +3594,11 @@ static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
|
|
4562
3594
|
return total;
|
4563
3595
|
}
|
4564
3596
|
|
4565
|
-
static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir)
|
4566
|
-
{
|
3597
|
+
static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir) {
|
4567
3598
|
return mtde_new(MR(ir));
|
4568
3599
|
}
|
4569
3600
|
|
4570
|
-
static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir)
|
4571
|
-
{
|
3601
|
+
static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir) {
|
4572
3602
|
return mtpe_new(MR(ir));
|
4573
3603
|
}
|
4574
3604
|
|
@@ -4577,25 +3607,21 @@ static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
4577
3607
|
return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
|
4578
3608
|
}
|
4579
3609
|
|
4580
|
-
static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num)
|
4581
|
-
{
|
3610
|
+
static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num) {
|
4582
3611
|
GET_READER();
|
4583
3612
|
return reader->term_vectors(reader, doc_num - MR(ir)->starts[i]);
|
4584
3613
|
}
|
4585
3614
|
|
4586
|
-
static bool mr_is_deleted(FrtIndexReader *ir, int doc_num)
|
4587
|
-
{
|
3615
|
+
static bool mr_is_deleted(FrtIndexReader *ir, int doc_num) {
|
4588
3616
|
GET_READER();
|
4589
3617
|
return reader->is_deleted(reader, doc_num - MR(ir)->starts[i]);
|
4590
3618
|
}
|
4591
3619
|
|
4592
|
-
static bool mr_has_deletions(FrtIndexReader *ir)
|
4593
|
-
{
|
3620
|
+
static bool mr_has_deletions(FrtIndexReader *ir) {
|
4594
3621
|
return MR(ir)->has_deletions;
|
4595
3622
|
}
|
4596
3623
|
|
4597
|
-
static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val)
|
4598
|
-
{
|
3624
|
+
static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val) {
|
4599
3625
|
int i = mr_reader_index_i(MR(ir), doc_num);
|
4600
3626
|
int fnum = frt_mr_get_field_num(MR(ir), i, field_num);
|
4601
3627
|
if (fnum >= 0) {
|
@@ -4606,8 +3632,7 @@ static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uc
|
|
4606
3632
|
}
|
4607
3633
|
}
|
4608
3634
|
|
4609
|
-
static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
|
4610
|
-
{
|
3635
|
+
static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num) {
|
4611
3636
|
GET_READER();
|
4612
3637
|
MR(ir)->num_docs_cache = -1; /* invalidate cache */
|
4613
3638
|
|
@@ -4617,8 +3642,7 @@ static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
|
|
4617
3642
|
ir->has_changes = true;
|
4618
3643
|
}
|
4619
3644
|
|
4620
|
-
static void mr_undelete_all_i(FrtIndexReader *ir)
|
4621
|
-
{
|
3645
|
+
static void mr_undelete_all_i(FrtIndexReader *ir) {
|
4622
3646
|
int i;
|
4623
3647
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4624
3648
|
|
@@ -4631,8 +3655,7 @@ static void mr_undelete_all_i(FrtIndexReader *ir)
|
|
4631
3655
|
ir->has_changes = true;
|
4632
3656
|
}
|
4633
3657
|
|
4634
|
-
static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
|
4635
|
-
{
|
3658
|
+
static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
|
4636
3659
|
int i;
|
4637
3660
|
ir->deleter = deleter;
|
4638
3661
|
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
@@ -4641,8 +3664,7 @@ static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
|
|
4641
3664
|
}
|
4642
3665
|
}
|
4643
3666
|
|
4644
|
-
static bool mr_is_latest_i(FrtIndexReader *ir)
|
4645
|
-
{
|
3667
|
+
static bool mr_is_latest_i(FrtIndexReader *ir) {
|
4646
3668
|
int i;
|
4647
3669
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4648
3670
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4653,8 +3675,7 @@ static bool mr_is_latest_i(FrtIndexReader *ir)
|
|
4653
3675
|
return true;
|
4654
3676
|
}
|
4655
3677
|
|
4656
|
-
static void mr_commit_i(FrtIndexReader *ir)
|
4657
|
-
{
|
3678
|
+
static void mr_commit_i(FrtIndexReader *ir) {
|
4658
3679
|
int i;
|
4659
3680
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4660
3681
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4663,8 +3684,7 @@ static void mr_commit_i(FrtIndexReader *ir)
|
|
4663
3684
|
}
|
4664
3685
|
}
|
4665
3686
|
|
4666
|
-
static void mr_close_i(FrtIndexReader *ir)
|
4667
|
-
{
|
3687
|
+
static void mr_close_i(FrtIndexReader *ir) {
|
4668
3688
|
int i;
|
4669
3689
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4670
3690
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4870,8 +3890,7 @@ FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store) {
|
|
4870
3890
|
*
|
4871
3891
|
****************************************************************************/
|
4872
3892
|
|
4873
|
-
static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
|
4874
|
-
{
|
3893
|
+
static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos) {
|
4875
3894
|
FrtOccurence *occ = FRT_MP_ALLOC(mp, FrtOccurence);
|
4876
3895
|
occ->pos = pos;
|
4877
3896
|
occ->next = NULL;
|
@@ -4884,8 +3903,7 @@ static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
|
|
4884
3903
|
*
|
4885
3904
|
****************************************************************************/
|
4886
3905
|
|
4887
|
-
FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
|
4888
|
-
{
|
3906
|
+
FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos) {
|
4889
3907
|
FrtPosting *p = FRT_MP_ALLOC(mp, FrtPosting);
|
4890
3908
|
p->doc_num = doc_num;
|
4891
3909
|
p->first_occ = occ_new(mp, pos);
|
@@ -4901,8 +3919,7 @@ FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
|
|
4901
3919
|
****************************************************************************/
|
4902
3920
|
|
4903
3921
|
FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
4904
|
-
int term_len, FrtPosting *p)
|
4905
|
-
{
|
3922
|
+
int term_len, FrtPosting *p) {
|
4906
3923
|
// TODO account for term_len as measured in the original text vs utf8 term_len of term
|
4907
3924
|
FrtPostingList *pl = FRT_MP_ALLOC(mp, FrtPostingList);
|
4908
3925
|
pl->term = (char *)frt_mp_memdup(mp, term, term_len + 1);
|
@@ -4912,20 +3929,17 @@ FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
|
4912
3929
|
return pl;
|
4913
3930
|
}
|
4914
3931
|
|
4915
|
-
void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos)
|
4916
|
-
{
|
3932
|
+
void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos) {
|
4917
3933
|
pl->last_occ = pl->last_occ->next = occ_new(mp, pos);
|
4918
3934
|
pl->last->freq++;
|
4919
3935
|
}
|
4920
3936
|
|
4921
|
-
static void pl_add_posting(FrtPostingList *pl, FrtPosting *p)
|
4922
|
-
{
|
3937
|
+
static void pl_add_posting(FrtPostingList *pl, FrtPosting *p) {
|
4923
3938
|
pl->last = pl->last->next = p;
|
4924
3939
|
pl->last_occ = p->first_occ;
|
4925
3940
|
}
|
4926
3941
|
|
4927
|
-
int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
4928
|
-
{
|
3942
|
+
int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2) {
|
4929
3943
|
return strcmp((*pl1)->term, (*pl2)->term);
|
4930
3944
|
}
|
4931
3945
|
|
@@ -4935,8 +3949,7 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
|
4935
3949
|
*
|
4936
3950
|
****************************************************************************/
|
4937
3951
|
|
4938
|
-
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
4939
|
-
{
|
3952
|
+
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi) {
|
4940
3953
|
FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
|
4941
3954
|
fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
|
4942
3955
|
fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
|
@@ -4953,8 +3966,7 @@ static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
|
4953
3966
|
return fld_inv;
|
4954
3967
|
}
|
4955
3968
|
|
4956
|
-
static void fld_inv_destroy(FrtFieldInverter *fld_inv)
|
4957
|
-
{
|
3969
|
+
static void fld_inv_destroy(FrtFieldInverter *fld_inv) {
|
4958
3970
|
frt_h_destroy(fld_inv->plists);
|
4959
3971
|
}
|
4960
3972
|
|
@@ -4964,8 +3976,7 @@ static void fld_inv_destroy(FrtFieldInverter *fld_inv)
|
|
4964
3976
|
*
|
4965
3977
|
****************************************************************************/
|
4966
3978
|
|
4967
|
-
typedef struct SkipBuffer
|
4968
|
-
{
|
3979
|
+
typedef struct SkipBuffer {
|
4969
3980
|
FrtOutStream *buf;
|
4970
3981
|
FrtOutStream *frq_out;
|
4971
3982
|
FrtOutStream *prx_out;
|
@@ -4974,16 +3985,14 @@ typedef struct SkipBuffer
|
|
4974
3985
|
frt_off_t last_prx_ptr;
|
4975
3986
|
} SkipBuffer;
|
4976
3987
|
|
4977
|
-
static void skip_buf_reset(SkipBuffer *skip_buf)
|
4978
|
-
{
|
3988
|
+
static void skip_buf_reset(SkipBuffer *skip_buf) {
|
4979
3989
|
frt_ramo_reset(skip_buf->buf);
|
4980
3990
|
skip_buf->last_doc = 0;
|
4981
3991
|
skip_buf->last_frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4982
3992
|
skip_buf->last_prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4983
3993
|
}
|
4984
3994
|
|
4985
|
-
static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
4986
|
-
{
|
3995
|
+
static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out) {
|
4987
3996
|
SkipBuffer *skip_buf = FRT_ALLOC(SkipBuffer);
|
4988
3997
|
skip_buf->buf = frt_ram_new_buffer();
|
4989
3998
|
skip_buf->frq_out = frq_out;
|
@@ -4991,8 +4000,7 @@ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
|
4991
4000
|
return skip_buf;
|
4992
4001
|
}
|
4993
4002
|
|
4994
|
-
static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
4995
|
-
{
|
4003
|
+
static void skip_buf_add(SkipBuffer *skip_buf, int doc) {
|
4996
4004
|
frt_off_t frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4997
4005
|
frt_off_t prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4998
4006
|
|
@@ -5005,15 +4013,13 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
|
5005
4013
|
skip_buf->last_prx_ptr = prx_ptr;
|
5006
4014
|
}
|
5007
4015
|
|
5008
|
-
static frt_off_t skip_buf_write(SkipBuffer *skip_buf)
|
5009
|
-
{
|
4016
|
+
static frt_off_t skip_buf_write(SkipBuffer *skip_buf) {
|
5010
4017
|
frt_off_t skip_ptr = frt_os_pos(skip_buf->frq_out);
|
5011
4018
|
frt_ramo_write_to(skip_buf->buf, skip_buf->frq_out);
|
5012
4019
|
return skip_ptr;
|
5013
4020
|
}
|
5014
4021
|
|
5015
|
-
static void skip_buf_destroy(SkipBuffer *skip_buf)
|
5016
|
-
{
|
4022
|
+
static void skip_buf_destroy(SkipBuffer *skip_buf) {
|
5017
4023
|
frt_ram_destroy_buffer(skip_buf->buf);
|
5018
4024
|
free(skip_buf);
|
5019
4025
|
}
|
@@ -5024,21 +4030,19 @@ static void skip_buf_destroy(SkipBuffer *skip_buf)
|
|
5024
4030
|
*
|
5025
4031
|
****************************************************************************/
|
5026
4032
|
|
5027
|
-
static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv)
|
5028
|
-
{
|
4033
|
+
static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv) {
|
5029
4034
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5030
4035
|
FrtOutStream *norms_out;
|
5031
4036
|
frt_si_advance_norm_gen(dw->si, fld_inv->fi->number);
|
5032
4037
|
si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
|
5033
|
-
norms_out = dw->store->new_output(dw->store, file_name);
|
4038
|
+
norms_out = dw->store->new_output(dw->store, segm_idx_name, file_name);
|
5034
4039
|
frt_os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
|
5035
4040
|
frt_os_close(norms_out);
|
5036
4041
|
}
|
5037
4042
|
|
5038
4043
|
/* we'll use the postings Hash's table area to sort the postings as it is
|
5039
4044
|
* going to be zeroset soon anyway */
|
5040
|
-
static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
|
5041
|
-
{
|
4045
|
+
static FrtPostingList **dw_sort_postings(FrtHash *plists_ht) {
|
5042
4046
|
int i, j;
|
5043
4047
|
FrtHashEntry *he;
|
5044
4048
|
FrtPostingList **plists = (FrtPostingList **)plists_ht->table;
|
@@ -5056,8 +4060,7 @@ static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
|
|
5056
4060
|
return plists;
|
5057
4061
|
}
|
5058
4062
|
|
5059
|
-
static void dw_flush_streams(FrtDocWriter *dw)
|
5060
|
-
{
|
4063
|
+
static void dw_flush_streams(FrtDocWriter *dw) {
|
5061
4064
|
frt_mp_reset(dw->mp);
|
5062
4065
|
frt_fw_close(dw->fw);
|
5063
4066
|
dw->fw = NULL;
|
@@ -5065,8 +4068,7 @@ static void dw_flush_streams(FrtDocWriter *dw)
|
|
5065
4068
|
dw->doc_num = 0;
|
5066
4069
|
}
|
5067
4070
|
|
5068
|
-
static void dw_flush(FrtDocWriter *dw)
|
5069
|
-
{
|
4071
|
+
static void dw_flush(FrtDocWriter *dw) {
|
5070
4072
|
int i, j, last_doc, doc_code, doc_freq, last_pos, posting_count;
|
5071
4073
|
int skip_interval = dw->skip_interval;
|
5072
4074
|
FrtFieldInfos *fis = dw->fis;
|
@@ -5084,9 +4086,9 @@ static void dw_flush(FrtDocWriter *dw)
|
|
5084
4086
|
SkipBuffer *skip_buf;
|
5085
4087
|
|
5086
4088
|
sprintf(file_name, "%s.frq", dw->si->name);
|
5087
|
-
frq_out = store->new_output(store, file_name);
|
4089
|
+
frq_out = store->new_output(store, segm_idx_name, file_name);
|
5088
4090
|
sprintf(file_name, "%s.prx", dw->si->name);
|
5089
|
-
prx_out = store->new_output(store, file_name);
|
4091
|
+
prx_out = store->new_output(store, segm_idx_name, file_name);
|
5090
4092
|
skip_buf = skip_buf_new(frq_out, prx_out);
|
5091
4093
|
|
5092
4094
|
for (i = 0; i < fields_count; i++) {
|
@@ -5120,8 +4122,7 @@ static void dw_flush(FrtDocWriter *dw)
|
|
5120
4122
|
|
5121
4123
|
if (p->freq == 1) {
|
5122
4124
|
frt_os_write_vint(frq_out, 1|doc_code);
|
5123
|
-
}
|
5124
|
-
else {
|
4125
|
+
} else {
|
5125
4126
|
frt_os_write_vint(frq_out, doc_code);
|
5126
4127
|
frt_os_write_vint(frq_out, p->freq);
|
5127
4128
|
}
|
@@ -5144,8 +4145,7 @@ static void dw_flush(FrtDocWriter *dw)
|
|
5144
4145
|
dw_flush_streams(dw);
|
5145
4146
|
}
|
5146
4147
|
|
5147
|
-
FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
5148
|
-
{
|
4148
|
+
FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si) {
|
5149
4149
|
FrtStore *store = iw->store;
|
5150
4150
|
FrtMemoryPool *mp = frt_mp_new_capa(iw->config.chunk_size,
|
5151
4151
|
iw->config.max_buffer_memory/iw->config.chunk_size);
|
@@ -5177,14 +4177,12 @@ FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
|
5177
4177
|
return dw;
|
5178
4178
|
}
|
5179
4179
|
|
5180
|
-
void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si)
|
5181
|
-
{
|
4180
|
+
void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si) {
|
5182
4181
|
dw->fw = frt_fw_open(dw->store, si->name, dw->fis);
|
5183
4182
|
dw->si = si;
|
5184
4183
|
}
|
5185
4184
|
|
5186
|
-
void frt_dw_close(FrtDocWriter *dw)
|
5187
|
-
{
|
4185
|
+
void frt_dw_close(FrtDocWriter *dw) {
|
5188
4186
|
if (dw->doc_num) {
|
5189
4187
|
dw_flush(dw);
|
5190
4188
|
}
|
@@ -5199,8 +4197,7 @@ void frt_dw_close(FrtDocWriter *dw)
|
|
5199
4197
|
free(dw);
|
5200
4198
|
}
|
5201
4199
|
|
5202
|
-
FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi)
|
5203
|
-
{
|
4200
|
+
FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi) {
|
5204
4201
|
FrtFieldInverter *fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number);
|
5205
4202
|
|
5206
4203
|
if (!fld_inv) {
|
@@ -5216,8 +4213,7 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
5216
4213
|
int doc_num,
|
5217
4214
|
const char *text,
|
5218
4215
|
int len,
|
5219
|
-
int pos)
|
5220
|
-
{
|
4216
|
+
int pos) {
|
5221
4217
|
FrtHashEntry *pl_he;
|
5222
4218
|
if (frt_h_set_ext(curr_plists, text, &pl_he)) {
|
5223
4219
|
FrtPosting *p = frt_p_new(mp, doc_num, pos);
|
@@ -5227,21 +4223,18 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
5227
4223
|
if (frt_h_set_ext(fld_plists, text, &fld_pl_he)) {
|
5228
4224
|
fld_pl_he->value = pl = frt_pl_new(mp, text, len, p);
|
5229
4225
|
pl_he->key = fld_pl_he->key = (char *)pl->term;
|
5230
|
-
}
|
5231
|
-
else {
|
4226
|
+
} else {
|
5232
4227
|
pl = (FrtPostingList *)fld_pl_he->value;
|
5233
4228
|
pl_add_posting(pl, p);
|
5234
4229
|
pl_he->key = (char *)pl->term;
|
5235
4230
|
}
|
5236
4231
|
pl_he->value = pl;
|
5237
|
-
}
|
5238
|
-
else {
|
4232
|
+
} else {
|
5239
4233
|
frt_pl_add_occ(mp, (FrtPostingList *)pl_he->value, pos);
|
5240
4234
|
}
|
5241
4235
|
}
|
5242
4236
|
|
5243
|
-
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end)
|
5244
|
-
{
|
4237
|
+
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end) {
|
5245
4238
|
if (pos >= dw->offsets_capa) {
|
5246
4239
|
int old_capa = dw->offsets_capa;
|
5247
4240
|
while (pos >= dw->offsets_capa) {
|
@@ -5305,7 +4298,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDoc
|
|
5305
4298
|
buf[FRT_MAX_WORD_SIZE - 1] = '\0';
|
5306
4299
|
for (i = 0; i < df_size; i++) {
|
5307
4300
|
int len = df->lengths[i];
|
5308
|
-
char *data_ptr = df->data[i];
|
4301
|
+
const char *data_ptr = df->data[i];
|
5309
4302
|
if (len >= FRT_MAX_WORD_SIZE) {
|
5310
4303
|
char *head_last = rb_enc_left_char_head(data_ptr, data_ptr + FRT_MAX_WORD_SIZE - 1, data_ptr + len, df->encodings[i]);
|
5311
4304
|
len = head_last - data_ptr;
|
@@ -5347,7 +4340,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
5347
4340
|
FrtFieldInverter *fld_inv;
|
5348
4341
|
FrtHash *postings;
|
5349
4342
|
FrtFieldInfo *fi;
|
5350
|
-
const int doc_size = doc->
|
4343
|
+
const int doc_size = doc->field_count;
|
5351
4344
|
|
5352
4345
|
/* frt_fw_add_doc will add new fields as necessary */
|
5353
4346
|
frt_fw_add_doc(dw->fw, doc);
|
@@ -5384,484 +4377,23 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
5384
4377
|
* IndexWriter
|
5385
4378
|
*
|
5386
4379
|
****************************************************************************/
|
5387
|
-
/****************************************************************************
|
5388
|
-
* SegmentMergeInfo
|
5389
|
-
****************************************************************************/
|
5390
|
-
|
5391
|
-
typedef struct SegmentMergeInfo {
|
5392
|
-
int base;
|
5393
|
-
int max_doc;
|
5394
|
-
int doc_cnt;
|
5395
|
-
FrtSegmentInfo *si;
|
5396
|
-
FrtStore *store;
|
5397
|
-
FrtStore *orig_store;
|
5398
|
-
FrtBitVector *deleted_docs;
|
5399
|
-
FrtSegmentFieldIndex *sfi;
|
5400
|
-
FrtTermEnum *te;
|
5401
|
-
FrtTermDocEnum *tde;
|
5402
|
-
char *term;
|
5403
|
-
int *doc_map;
|
5404
|
-
FrtInStream *frq_in;
|
5405
|
-
FrtInStream *prx_in;
|
5406
|
-
} SegmentMergeInfo;
|
5407
|
-
|
5408
|
-
static bool smi_lt(const SegmentMergeInfo *smi1, const SegmentMergeInfo *smi2)
|
5409
|
-
{
|
5410
|
-
int cmpres = strcmp(smi1->term, smi2->term);
|
5411
|
-
if (0 == cmpres) {
|
5412
|
-
return smi1->base < smi2->base;
|
5413
|
-
}
|
5414
|
-
else {
|
5415
|
-
return cmpres < 0;
|
5416
|
-
}
|
5417
|
-
}
|
5418
|
-
|
5419
|
-
static void smi_load_doc_map(SegmentMergeInfo *smi)
|
5420
|
-
{
|
5421
|
-
FrtBitVector *deleted_docs = smi->deleted_docs;
|
5422
|
-
const int max_doc = smi->max_doc;
|
5423
|
-
int j = 0, i;
|
5424
|
-
|
5425
|
-
smi->doc_map = FRT_ALLOC_N(int, max_doc);
|
5426
|
-
for (i = 0; i < max_doc; i++) {
|
5427
|
-
if (frt_bv_get(deleted_docs, i)) {
|
5428
|
-
smi->doc_map[i] = -1;
|
5429
|
-
}
|
5430
|
-
else {
|
5431
|
-
smi->doc_map[i] = j++;
|
5432
|
-
}
|
5433
|
-
}
|
5434
|
-
smi->doc_cnt = j;
|
5435
|
-
}
|
5436
|
-
|
5437
|
-
static SegmentMergeInfo *smi_new(int base, FrtStore *store, FrtSegmentInfo *si)
|
5438
|
-
{
|
5439
|
-
SegmentMergeInfo *smi = FRT_ALLOC_AND_ZERO(SegmentMergeInfo);
|
5440
|
-
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5441
|
-
char *segment = si->name;
|
5442
|
-
smi->base = base;
|
5443
|
-
smi->si = si;
|
5444
|
-
smi->orig_store = smi->store = store;
|
5445
|
-
FRT_REF(smi->orig_store);
|
5446
|
-
|
5447
|
-
sprintf(file_name, "%s.fdx", segment);
|
5448
|
-
smi->doc_cnt = smi->max_doc
|
5449
|
-
= smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
|
5450
|
-
|
5451
|
-
if (si->del_gen >= 0) {
|
5452
|
-
frt_fn_for_generation(file_name, segment, "del", si->del_gen);
|
5453
|
-
smi->deleted_docs = bv_read(store, file_name);
|
5454
|
-
smi_load_doc_map(smi);
|
5455
|
-
}
|
5456
|
-
return smi;
|
5457
|
-
}
|
5458
|
-
|
5459
|
-
static void smi_load_term_input(SegmentMergeInfo *smi)
|
5460
|
-
{
|
5461
|
-
FrtStore *store = smi->store;
|
5462
|
-
char *segment = smi->si->name;
|
5463
|
-
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5464
|
-
smi->sfi = frt_sfi_open(store, segment);
|
5465
|
-
sprintf(file_name, "%s.tis", segment);
|
5466
|
-
FrtInStream *is = store->open_input(store, file_name);
|
5467
|
-
FRT_DEREF(is);
|
5468
|
-
smi->te = TE(frt_ste_new(is, smi->sfi));
|
5469
|
-
sprintf(file_name, "%s.frq", segment);
|
5470
|
-
smi->frq_in = store->open_input(store, file_name);
|
5471
|
-
sprintf(file_name, "%s.prx", segment);
|
5472
|
-
smi->prx_in = store->open_input(store, file_name);
|
5473
|
-
smi->tde = frt_stpe_new(NULL, smi->frq_in, smi->prx_in, smi->deleted_docs,
|
5474
|
-
STE(smi->te)->skip_interval);
|
5475
|
-
}
|
5476
|
-
|
5477
|
-
static void smi_close_term_input(SegmentMergeInfo *smi)
|
5478
|
-
{
|
5479
|
-
frt_ste_close(smi->te);
|
5480
|
-
frt_sfi_close(smi->sfi);
|
5481
|
-
stpe_close(smi->tde);
|
5482
|
-
frt_is_close(smi->frq_in);
|
5483
|
-
frt_is_close(smi->prx_in);
|
5484
|
-
}
|
5485
|
-
|
5486
|
-
static void smi_destroy(SegmentMergeInfo *smi)
|
5487
|
-
{
|
5488
|
-
if (smi->store != smi->orig_store) {
|
5489
|
-
frt_store_close(smi->store);
|
5490
|
-
}
|
5491
|
-
frt_store_close(smi->orig_store);
|
5492
|
-
if (smi->deleted_docs) {
|
5493
|
-
frt_bv_destroy(smi->deleted_docs);
|
5494
|
-
free(smi->doc_map);
|
5495
|
-
}
|
5496
|
-
free(smi);
|
5497
|
-
}
|
5498
|
-
|
5499
|
-
static char *smi_next(SegmentMergeInfo *smi)
|
5500
|
-
{
|
5501
|
-
return (smi->term = ste_next(smi->te));
|
5502
|
-
}
|
5503
|
-
|
5504
|
-
/****************************************************************************
|
5505
|
-
* SegmentMerger
|
5506
|
-
****************************************************************************/
|
5507
|
-
|
5508
|
-
typedef struct SegmentMerger {
|
5509
|
-
FrtTermInfo ti;
|
5510
|
-
FrtStore *store;
|
5511
|
-
FrtFieldInfos *fis;
|
5512
|
-
FrtSegmentInfo *si;
|
5513
|
-
SegmentMergeInfo **smis;
|
5514
|
-
int seg_cnt;
|
5515
|
-
int doc_cnt;
|
5516
|
-
FrtConfig *config;
|
5517
|
-
FrtTermInfosWriter *tiw;
|
5518
|
-
char *term_buf;
|
5519
|
-
int term_buf_ptr;
|
5520
|
-
int term_buf_size;
|
5521
|
-
FrtPriorityQueue *queue;
|
5522
|
-
SkipBuffer *skip_buf;
|
5523
|
-
FrtOutStream *frq_out;
|
5524
|
-
FrtOutStream *prx_out;
|
5525
|
-
} SegmentMerger;
|
5526
|
-
|
5527
|
-
static SegmentMerger *sm_create(FrtIndexWriter *iw, FrtSegmentInfo *si, FrtSegmentInfo **seg_infos, const int seg_cnt)
|
5528
|
-
{
|
5529
|
-
int i;
|
5530
|
-
SegmentMerger *sm = FRT_ALLOC_AND_ZERO_N(SegmentMerger, seg_cnt);
|
5531
|
-
sm->store = iw->store;
|
5532
|
-
FRT_REF(sm->store);
|
5533
|
-
sm->fis = iw->fis;
|
5534
|
-
sm->si = si;
|
5535
|
-
sm->doc_cnt = 0;
|
5536
|
-
sm->smis = FRT_ALLOC_N(SegmentMergeInfo *, seg_cnt);
|
5537
|
-
for (i = 0; i < seg_cnt; i++) {
|
5538
|
-
sm->smis[i] = smi_new(sm->doc_cnt, seg_infos[i]->store, seg_infos[i]);
|
5539
|
-
sm->doc_cnt += sm->smis[i]->doc_cnt;
|
5540
|
-
}
|
5541
|
-
sm->seg_cnt = seg_cnt;
|
5542
|
-
sm->config = &iw->config;
|
5543
|
-
return sm;
|
5544
|
-
}
|
5545
|
-
|
5546
|
-
static void sm_destroy(SegmentMerger *sm)
|
5547
|
-
{
|
5548
|
-
int i;
|
5549
|
-
const int seg_cnt = sm->seg_cnt;
|
5550
|
-
for (i = 0; i < seg_cnt; i++) {
|
5551
|
-
smi_destroy(sm->smis[i]);
|
5552
|
-
}
|
5553
|
-
frt_store_close(sm->store);
|
5554
|
-
free(sm->smis);
|
5555
|
-
free(sm);
|
5556
|
-
}
|
5557
|
-
|
5558
|
-
static void sm_merge_fields(SegmentMerger *sm)
|
5559
|
-
{
|
5560
|
-
int i, j;
|
5561
|
-
frt_off_t start, end = 0;
|
5562
|
-
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5563
|
-
FrtOutStream *fdt_out, *fdx_out;
|
5564
|
-
FrtStore *store = sm->store;
|
5565
|
-
const int seg_cnt = sm->seg_cnt;
|
5566
|
-
|
5567
|
-
sprintf(file_name, "%s.fdt", sm->si->name);
|
5568
|
-
fdt_out = store->new_output(store, file_name);
|
5569
|
-
|
5570
|
-
sprintf(file_name, "%s.fdx", sm->si->name);
|
5571
|
-
fdx_out = store->new_output(store, file_name);
|
5572
|
-
|
5573
|
-
for (i = 0; i < seg_cnt; i++) {
|
5574
|
-
SegmentMergeInfo *smi = sm->smis[i];
|
5575
|
-
const int max_doc = smi->max_doc;
|
5576
|
-
FrtInStream *fdt_in, *fdx_in;
|
5577
|
-
char *segment = smi->si->name;
|
5578
|
-
store = smi->store;
|
5579
|
-
sprintf(file_name, "%s.fdt", segment);
|
5580
|
-
fdt_in = store->open_input(store, file_name);
|
5581
|
-
sprintf(file_name, "%s.fdx", segment);
|
5582
|
-
fdx_in = store->open_input(store, file_name);
|
5583
|
-
|
5584
|
-
if (max_doc > 0) {
|
5585
|
-
end = (off_t)frt_is_read_u64(fdx_in);
|
5586
|
-
}
|
5587
|
-
for (j = 0; j < max_doc; j++) {
|
5588
|
-
frt_u32 tv_idx_offset = frt_is_read_u32(fdx_in);
|
5589
|
-
start = end;
|
5590
|
-
if (j == max_doc - 1) {
|
5591
|
-
end = frt_is_length(fdt_in);
|
5592
|
-
}
|
5593
|
-
else {
|
5594
|
-
end = (off_t)frt_is_read_u64(fdx_in);
|
5595
|
-
}
|
5596
|
-
/* skip deleted docs */
|
5597
|
-
if (!smi->deleted_docs || !frt_bv_get(smi->deleted_docs, j)) {
|
5598
|
-
frt_os_write_u64(fdx_out, frt_os_pos(fdt_out));
|
5599
|
-
frt_os_write_u32(fdx_out, tv_idx_offset);
|
5600
|
-
frt_is_seek(fdt_in, start);
|
5601
|
-
frt_is2os_copy_bytes(fdt_in, fdt_out, end - start);
|
5602
|
-
}
|
5603
|
-
}
|
5604
|
-
frt_is_close(fdt_in);
|
5605
|
-
frt_is_close(fdx_in);
|
5606
|
-
}
|
5607
|
-
frt_os_close(fdt_out);
|
5608
|
-
frt_os_close(fdx_out);
|
5609
|
-
}
|
5610
|
-
|
5611
|
-
static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
|
5612
|
-
const int match_size)
|
5613
|
-
{
|
5614
|
-
int i;
|
5615
|
-
int last_doc = 0, base, doc, doc_code, freq;
|
5616
|
-
int skip_interval = sm->config->skip_interval;
|
5617
|
-
int *doc_map = NULL;
|
5618
|
-
int df = 0; /* number of docs w/ term */
|
5619
|
-
FrtTermDocEnum *tde;
|
5620
|
-
SegmentMergeInfo *smi;
|
5621
|
-
SkipBuffer *skip_buf = sm->skip_buf;
|
5622
|
-
skip_buf_reset(skip_buf);
|
5623
|
-
|
5624
|
-
for (i = 0; i < match_size; i++) {
|
5625
|
-
smi = matches[i];
|
5626
|
-
base = smi->base;
|
5627
|
-
doc_map = smi->doc_map;
|
5628
|
-
tde = smi->tde;
|
5629
|
-
stpe_seek_ti(STDE(tde), &smi->te->curr_ti);
|
5630
|
-
|
5631
|
-
/* since we are using copy_bytes below to copy the proximities we use
|
5632
|
-
* stde_next rather than stpe_next here */
|
5633
|
-
while (stde_next(tde)) {
|
5634
|
-
doc = stde_doc_num(tde);
|
5635
|
-
if (NULL != doc_map) {
|
5636
|
-
doc = doc_map[doc]; /* work around deletions */
|
5637
|
-
}
|
5638
|
-
doc += base; /* convert to merged space */
|
5639
|
-
assert(doc == 0 || doc > last_doc);
|
5640
|
-
|
5641
|
-
df++;
|
5642
|
-
if (0 == (df % skip_interval)) {
|
5643
|
-
skip_buf_add(skip_buf, last_doc);
|
5644
|
-
}
|
5645
|
-
|
5646
|
-
doc_code = (doc - last_doc) << 1; /* use low bit to flag freq=1 */
|
5647
|
-
last_doc = doc;
|
5648
|
-
|
5649
|
-
freq = stde_freq(tde);
|
5650
|
-
if (freq == 1) {
|
5651
|
-
frt_os_write_vint(sm->frq_out, doc_code | 1); /* doc & freq=1 */
|
5652
|
-
}
|
5653
|
-
else {
|
5654
|
-
frt_os_write_vint(sm->frq_out, doc_code); /* write doc */
|
5655
|
-
frt_os_write_vint(sm->frq_out, freq); /* write freqency in doc */
|
5656
|
-
}
|
5657
|
-
|
5658
|
-
/* copy position deltas */
|
5659
|
-
frt_is2os_copy_vints(STDE(tde)->prx_in, sm->prx_out, freq);
|
5660
|
-
}
|
5661
|
-
}
|
5662
|
-
return df;
|
5663
|
-
}
|
5664
|
-
|
5665
|
-
static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
|
5666
|
-
{
|
5667
|
-
term = (char *)memcpy(sm->term_buf + sm->term_buf_ptr, term, term_len + 1);
|
5668
|
-
sm->term_buf_ptr += term_len + 1;
|
5669
|
-
if (sm->term_buf_ptr > sm->term_buf_size) {
|
5670
|
-
sm->term_buf_ptr = 0;
|
5671
|
-
}
|
5672
|
-
return term;
|
5673
|
-
}
|
5674
|
-
|
5675
|
-
static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
|
5676
|
-
int match_size)
|
5677
|
-
{
|
5678
|
-
frt_off_t frq_ptr = frt_os_pos(sm->frq_out);
|
5679
|
-
frt_off_t prx_ptr = frt_os_pos(sm->prx_out);
|
5680
|
-
|
5681
|
-
int df = sm_append_postings(sm, matches, match_size); /* append posting data */
|
5682
|
-
|
5683
|
-
frt_off_t skip_ptr = skip_buf_write(sm->skip_buf);
|
5684
|
-
|
5685
|
-
if (df > 0) {
|
5686
|
-
/* add an entry to the dictionary with ptrs to prox and freq files */
|
5687
|
-
SegmentMergeInfo *first_match = matches[0];
|
5688
|
-
int term_len = first_match->te->curr_term_len;
|
5689
|
-
|
5690
|
-
frt_ti_set(sm->ti, df, frq_ptr, prx_ptr,
|
5691
|
-
(skip_ptr - frq_ptr));
|
5692
|
-
frt_tiw_add(sm->tiw, sm_cache_term(sm, first_match->term, term_len),
|
5693
|
-
term_len, &sm->ti);
|
5694
|
-
}
|
5695
|
-
}
|
5696
|
-
|
5697
|
-
static void sm_merge_term_infos(SegmentMerger *sm)
|
5698
|
-
{
|
5699
|
-
int i, j, match_size;
|
5700
|
-
SegmentMergeInfo *smi, *top, **matches;
|
5701
|
-
char *term;
|
5702
|
-
const int seg_cnt = sm->seg_cnt;
|
5703
|
-
const int fis_size = sm->fis->size;
|
5704
|
-
|
5705
|
-
matches = FRT_ALLOC_N(SegmentMergeInfo *, seg_cnt);
|
5706
|
-
|
5707
|
-
for (j = 0; j < seg_cnt; j++) {
|
5708
|
-
smi_load_term_input(sm->smis[j]);
|
5709
|
-
}
|
5710
|
-
|
5711
|
-
for (i = 0; i < fis_size; i++) {
|
5712
|
-
frt_tiw_start_field(sm->tiw, i);
|
5713
|
-
for (j = 0; j < seg_cnt; j++) {
|
5714
|
-
smi = sm->smis[j];
|
5715
|
-
ste_set_field(smi->te, i);
|
5716
|
-
if (NULL != smi_next(smi)) {
|
5717
|
-
frt_pq_push(sm->queue, smi); /* initialize @queue */
|
5718
|
-
}
|
5719
|
-
}
|
5720
|
-
while (sm->queue->size > 0) {
|
5721
|
-
match_size = 0; /* pop matching terms */
|
5722
|
-
matches[0] = (SegmentMergeInfo *)frt_pq_pop(sm->queue);
|
5723
|
-
match_size++;
|
5724
|
-
term = matches[0]->term;
|
5725
|
-
top = (SegmentMergeInfo *)frt_pq_top(sm->queue);
|
5726
|
-
while ((NULL != top) && (0 == strcmp(term, top->term))) {
|
5727
|
-
matches[match_size] = (SegmentMergeInfo *)frt_pq_pop(sm->queue);
|
5728
|
-
match_size++;
|
5729
|
-
top = (SegmentMergeInfo *)frt_pq_top(sm->queue);
|
5730
|
-
}
|
5731
|
-
|
5732
|
-
sm_merge_term_info(sm, matches, match_size);/* add new FrtTermInfo */
|
5733
|
-
|
5734
|
-
while (match_size > 0) {
|
5735
|
-
match_size--;
|
5736
|
-
smi = matches[match_size];
|
5737
|
-
if (NULL != smi_next(smi)) {
|
5738
|
-
frt_pq_push(sm->queue, smi); /* restore queue */
|
5739
|
-
}
|
5740
|
-
}
|
5741
|
-
}
|
5742
|
-
}
|
5743
|
-
free(matches);
|
5744
|
-
for (j = 0; j < seg_cnt; j++) {
|
5745
|
-
smi_close_term_input(sm->smis[j]);
|
5746
|
-
}
|
5747
|
-
}
|
5748
|
-
|
5749
|
-
static void sm_merge_terms(SegmentMerger *sm)
|
5750
|
-
{
|
5751
|
-
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5752
|
-
|
5753
|
-
sprintf(file_name, "%s.frq", sm->si->name);
|
5754
|
-
sm->frq_out = sm->store->new_output(sm->store, file_name);
|
5755
|
-
sprintf(file_name, "%s.prx", sm->si->name);
|
5756
|
-
sm->prx_out = sm->store->new_output(sm->store, file_name);
|
5757
|
-
|
5758
|
-
sm->tiw = frt_tiw_open(sm->store, sm->si->name, sm->config->index_interval,
|
5759
|
-
sm->config->skip_interval);
|
5760
|
-
sm->skip_buf = skip_buf_new(sm->frq_out, sm->prx_out);
|
5761
|
-
|
5762
|
-
/* terms_buf_ptr holds a buffer of terms since the FrtTermInfosWriter needs
|
5763
|
-
* to keep the last index_interval terms so that it can compare the last
|
5764
|
-
* term put in the index with the next one. So the size of the buffer must
|
5765
|
-
* by index_interval + 2. */
|
5766
|
-
sm->term_buf_ptr = 0;
|
5767
|
-
sm->term_buf_size = (sm->config->index_interval + 1) * FRT_MAX_WORD_SIZE;
|
5768
|
-
sm->term_buf = FRT_ALLOC_N(char, sm->term_buf_size + FRT_MAX_WORD_SIZE);
|
5769
|
-
|
5770
|
-
sm->queue = frt_pq_new(sm->seg_cnt, (frt_lt_ft)&smi_lt, NULL);
|
5771
|
-
|
5772
|
-
sm_merge_term_infos(sm);
|
5773
|
-
|
5774
|
-
frt_os_close(sm->frq_out);
|
5775
|
-
frt_os_close(sm->prx_out);
|
5776
|
-
frt_tiw_close(sm->tiw);
|
5777
|
-
frt_pq_destroy(sm->queue);
|
5778
|
-
skip_buf_destroy(sm->skip_buf);
|
5779
|
-
free(sm->term_buf);
|
5780
|
-
}
|
5781
|
-
|
5782
|
-
static void sm_merge_norms(SegmentMerger *sm)
|
5783
|
-
{
|
5784
|
-
FrtSegmentInfo *si;
|
5785
|
-
int i, j, k;
|
5786
|
-
FrtStore *store;
|
5787
|
-
frt_uchar byte;
|
5788
|
-
FrtFieldInfo *fi;
|
5789
|
-
FrtOutStream *os;
|
5790
|
-
FrtInStream *is;
|
5791
|
-
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5792
|
-
SegmentMergeInfo *smi;
|
5793
|
-
const int seg_cnt = sm->seg_cnt;
|
5794
|
-
for (i = sm->fis->size - 1; i >= 0; i--) {
|
5795
|
-
fi = sm->fis->fields[i];
|
5796
|
-
if (bits_has_norms(fi->bits)) {
|
5797
|
-
si = sm->si;
|
5798
|
-
frt_si_advance_norm_gen(si, i);
|
5799
|
-
si_norm_file_name(si, file_name, i);
|
5800
|
-
os = sm->store->new_output(sm->store, file_name);
|
5801
|
-
for (j = 0; j < seg_cnt; j++) {
|
5802
|
-
smi = sm->smis[j];
|
5803
|
-
si = smi->si;
|
5804
|
-
if (si_norm_file_name(si, file_name, i)) {
|
5805
|
-
const int max_doc = smi->max_doc;
|
5806
|
-
FrtBitVector *deleted_docs = smi->deleted_docs;
|
5807
|
-
store = smi->store;
|
5808
|
-
is = store->open_input(store, file_name);
|
5809
|
-
if (deleted_docs) {
|
5810
|
-
for (k = 0; k < max_doc; k++) {
|
5811
|
-
byte = frt_is_read_byte(is);
|
5812
|
-
if (!frt_bv_get(deleted_docs, k)) {
|
5813
|
-
frt_os_write_byte(os, byte);
|
5814
|
-
}
|
5815
|
-
}
|
5816
|
-
}
|
5817
|
-
else {
|
5818
|
-
frt_is2os_copy_bytes(is, os, max_doc);
|
5819
|
-
}
|
5820
|
-
frt_is_close(is);
|
5821
|
-
}
|
5822
|
-
else {
|
5823
|
-
const int doc_cnt = smi->doc_cnt;
|
5824
|
-
for (k = 0; k < doc_cnt; k++) {
|
5825
|
-
frt_os_write_byte(os, '\0');
|
5826
|
-
}
|
5827
|
-
}
|
5828
|
-
}
|
5829
|
-
frt_os_close(os);
|
5830
|
-
}
|
5831
|
-
}
|
5832
|
-
}
|
5833
|
-
|
5834
|
-
static int sm_merge(SegmentMerger *sm)
|
5835
|
-
{
|
5836
|
-
sm_merge_fields(sm);
|
5837
|
-
sm_merge_terms(sm);
|
5838
|
-
sm_merge_norms(sm);
|
5839
|
-
return sm->doc_cnt;
|
5840
|
-
}
|
5841
|
-
|
5842
|
-
|
5843
|
-
/****************************************************************************
|
5844
|
-
* IndexWriter
|
5845
|
-
****************************************************************************/
|
5846
4380
|
|
5847
4381
|
/* prepare an index ready for writing */
|
5848
|
-
void frt_index_create(FrtStore *store, FrtFieldInfos *fis)
|
5849
|
-
{
|
4382
|
+
void frt_index_create(FrtStore *store, FrtFieldInfos *fis) {
|
5850
4383
|
FrtSegmentInfos *sis = frt_sis_new(fis);
|
5851
|
-
store->clear_all(store);
|
4384
|
+
store->clear_all(store, segm_idx_name);
|
5852
4385
|
frt_sis_write(sis, store, NULL);
|
5853
4386
|
frt_sis_destroy(sis);
|
5854
4387
|
}
|
5855
4388
|
|
5856
4389
|
bool frt_index_is_locked(FrtStore *store) {
|
5857
|
-
FrtLock *write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
|
4390
|
+
FrtLock *write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
5858
4391
|
bool is_locked = write_lock->is_locked(write_lock);
|
5859
4392
|
frt_close_lock(write_lock);
|
5860
4393
|
return is_locked;
|
5861
4394
|
}
|
5862
4395
|
|
5863
|
-
int frt_iw_doc_count(FrtIndexWriter *iw)
|
5864
|
-
{
|
4396
|
+
int frt_iw_doc_count(FrtIndexWriter *iw) {
|
5865
4397
|
int i, doc_cnt = 0;
|
5866
4398
|
pthread_mutex_lock(&iw->mutex);
|
5867
4399
|
for (i = iw->sis->size - 1; i >= 0; i--) {
|
@@ -5874,68 +4406,6 @@ int frt_iw_doc_count(FrtIndexWriter *iw)
|
|
5874
4406
|
return doc_cnt;
|
5875
4407
|
}
|
5876
4408
|
|
5877
|
-
static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg, const int max_seg) {
|
5878
|
-
int i;
|
5879
|
-
FrtSegmentInfos *sis = iw->sis;
|
5880
|
-
FrtSegmentInfo *si = frt_sis_new_segment(sis, 0, iw->store);
|
5881
|
-
|
5882
|
-
SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg], max_seg - min_seg);
|
5883
|
-
|
5884
|
-
/* This is where all the action happens. */
|
5885
|
-
si->doc_cnt = sm_merge(merger);
|
5886
|
-
|
5887
|
-
pthread_mutex_lock(&iw->store->mutex);
|
5888
|
-
/* delete merged segments */
|
5889
|
-
for (i = min_seg; i < max_seg; i++) {
|
5890
|
-
si_delete_files(sis->segs[i], iw->fis, iw->deleter);
|
5891
|
-
}
|
5892
|
-
|
5893
|
-
frt_sis_del_from_to(sis, min_seg, max_seg);
|
5894
|
-
|
5895
|
-
frt_sis_write(sis, iw->store, iw->deleter);
|
5896
|
-
deleter_commit_pending_deletions(iw->deleter);
|
5897
|
-
|
5898
|
-
pthread_mutex_unlock(&iw->store->mutex);
|
5899
|
-
|
5900
|
-
sm_destroy(merger);
|
5901
|
-
}
|
5902
|
-
|
5903
|
-
static void iw_merge_segments_from(FrtIndexWriter *iw, int min_segment)
|
5904
|
-
{
|
5905
|
-
iw_merge_segments(iw, min_segment, iw->sis->size);
|
5906
|
-
}
|
5907
|
-
|
5908
|
-
static void iw_maybe_merge_segments(FrtIndexWriter *iw)
|
5909
|
-
{
|
5910
|
-
int target_merge_docs = iw->config.merge_factor;
|
5911
|
-
int min_segment, merge_docs;
|
5912
|
-
FrtSegmentInfo *si;
|
5913
|
-
|
5914
|
-
while (target_merge_docs > 0
|
5915
|
-
&& target_merge_docs <= iw->config.max_merge_docs) {
|
5916
|
-
/* find segments smaller than current target size */
|
5917
|
-
min_segment = iw->sis->size - 1;
|
5918
|
-
merge_docs = 0;
|
5919
|
-
while (min_segment >= 0) {
|
5920
|
-
si = iw->sis->segs[min_segment];
|
5921
|
-
if (si->doc_cnt >= target_merge_docs) {
|
5922
|
-
break;
|
5923
|
-
}
|
5924
|
-
merge_docs += si->doc_cnt;
|
5925
|
-
min_segment--;
|
5926
|
-
}
|
5927
|
-
|
5928
|
-
if (merge_docs >= target_merge_docs) { /* found a merge to do */
|
5929
|
-
iw_merge_segments_from(iw, min_segment + 1);
|
5930
|
-
}
|
5931
|
-
else if (min_segment <= 0) {
|
5932
|
-
break;
|
5933
|
-
}
|
5934
|
-
|
5935
|
-
target_merge_docs *= iw->config.merge_factor;
|
5936
|
-
}
|
5937
|
-
}
|
5938
|
-
|
5939
4409
|
static void iw_flush_ram_segment(FrtIndexWriter *iw) {
|
5940
4410
|
FrtSegmentInfos *sis = iw->sis;
|
5941
4411
|
FrtSegmentInfo *si;
|
@@ -5947,16 +4417,13 @@ static void iw_flush_ram_segment(FrtIndexWriter *iw) {
|
|
5947
4417
|
frt_sis_write(iw->sis, iw->store, iw->deleter);
|
5948
4418
|
deleter_commit_pending_deletions(iw->deleter);
|
5949
4419
|
pthread_mutex_unlock(&iw->store->mutex);
|
5950
|
-
// iw_maybe_merge_segments(iw);
|
5951
4420
|
}
|
5952
4421
|
|
5953
|
-
void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
|
5954
|
-
{
|
4422
|
+
void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc) {
|
5955
4423
|
pthread_mutex_lock(&iw->mutex);
|
5956
4424
|
if (NULL == iw->dw) {
|
5957
4425
|
iw->dw = frt_dw_open(iw, frt_sis_new_segment(iw->sis, 0, iw->store));
|
5958
|
-
}
|
5959
|
-
else if (NULL == iw->dw->fw) {
|
4426
|
+
} else if (NULL == iw->dw->fw) {
|
5960
4427
|
frt_dw_new_segment(iw->dw, frt_sis_new_segment(iw->sis, 0, iw->store));
|
5961
4428
|
}
|
5962
4429
|
frt_dw_add_doc(iw->dw, doc);
|
@@ -5967,15 +4434,13 @@ void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
|
|
5967
4434
|
pthread_mutex_unlock(&iw->mutex);
|
5968
4435
|
}
|
5969
4436
|
|
5970
|
-
static void iw_commit_i(FrtIndexWriter *iw)
|
5971
|
-
{
|
4437
|
+
static void iw_commit_i(FrtIndexWriter *iw) {
|
5972
4438
|
if (iw->dw && iw->dw->doc_num > 0) {
|
5973
4439
|
iw_flush_ram_segment(iw);
|
5974
4440
|
}
|
5975
4441
|
}
|
5976
4442
|
|
5977
|
-
void frt_iw_commit(FrtIndexWriter *iw)
|
5978
|
-
{
|
4443
|
+
void frt_iw_commit(FrtIndexWriter *iw) {
|
5979
4444
|
pthread_mutex_lock(&iw->mutex);
|
5980
4445
|
iw_commit_i(iw);
|
5981
4446
|
pthread_mutex_unlock(&iw->mutex);
|
@@ -6051,28 +4516,7 @@ void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int t
|
|
6051
4516
|
}
|
6052
4517
|
}
|
6053
4518
|
|
6054
|
-
|
6055
|
-
{
|
6056
|
-
int min_segment;
|
6057
|
-
iw_commit_i(iw);
|
6058
|
-
// while (iw->sis->size > 1
|
6059
|
-
// || (iw->sis->size == 1
|
6060
|
-
// && (frt_si_has_deletions(iw->sis->segs[0])
|
6061
|
-
// || (iw->sis->segs[0]->store != iw->store)))) {
|
6062
|
-
// min_segment = iw->sis->size - iw->config.merge_factor;
|
6063
|
-
// iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
|
6064
|
-
// }
|
6065
|
-
}
|
6066
|
-
|
6067
|
-
void frt_iw_optimize(FrtIndexWriter *iw)
|
6068
|
-
{
|
6069
|
-
pthread_mutex_lock(&iw->mutex);
|
6070
|
-
iw_optimize_i(iw);
|
6071
|
-
pthread_mutex_unlock(&iw->mutex);
|
6072
|
-
}
|
6073
|
-
|
6074
|
-
void frt_iw_close(FrtIndexWriter *iw)
|
6075
|
-
{
|
4519
|
+
void frt_iw_close(FrtIndexWriter *iw) {
|
6076
4520
|
pthread_mutex_lock(&iw->mutex);
|
6077
4521
|
iw_commit_i(iw);
|
6078
4522
|
if (iw->dw) {
|
@@ -6108,7 +4552,7 @@ FrtIndexWriter *frt_iw_open(FrtIndexWriter *iw, FrtStore *store, FrtAnalyzer *vo
|
|
6108
4552
|
iw->config = *config;
|
6109
4553
|
|
6110
4554
|
FRT_TRY
|
6111
|
-
iw->write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
|
4555
|
+
iw->write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
6112
4556
|
if (!iw->write_lock->obtain(iw->write_lock)) {
|
6113
4557
|
FRT_RAISE(FRT_LOCK_ERROR, "Couldn't obtain write lock when opening IndexWriter");
|
6114
4558
|
}
|
@@ -6150,21 +4594,21 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
6150
4594
|
char *sr_segment = sr->si->name;
|
6151
4595
|
|
6152
4596
|
sprintf(file_name, "%s.fdt", segment);
|
6153
|
-
fdt_out = store_out->new_output(store_out, file_name);
|
4597
|
+
fdt_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6154
4598
|
sprintf(file_name, "%s.fdx", segment);
|
6155
|
-
fdx_out = store_out->new_output(store_out, file_name);
|
4599
|
+
fdx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6156
4600
|
|
6157
4601
|
sprintf(file_name, "%s.fdt", sr_segment);
|
6158
|
-
fdt_in = store_in->open_input(store_in, file_name);
|
4602
|
+
fdt_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6159
4603
|
sprintf(file_name, "%s.fdx", sr_segment);
|
6160
|
-
fdx_in = store_in->open_input(store_in, file_name);
|
4604
|
+
fdx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6161
4605
|
|
6162
4606
|
sprintf(file_name, "%s.del", sr_segment);
|
6163
|
-
if (store_in->exists(store_in, file_name)) {
|
4607
|
+
if (store_in->exists(store_in, segm_idx_name, file_name)) {
|
6164
4608
|
FrtOutStream *del_out;
|
6165
|
-
FrtInStream *del_in = store_in->open_input(store_in, file_name);
|
4609
|
+
FrtInStream *del_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6166
4610
|
sprintf(file_name, "%s.del", segment);
|
6167
|
-
del_out = store_out->new_output(store_out, file_name);
|
4611
|
+
del_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6168
4612
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
6169
4613
|
frt_os_close(del_out);
|
6170
4614
|
frt_is_close(del_in);
|
@@ -6231,8 +4675,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
6231
4675
|
}
|
6232
4676
|
|
6233
4677
|
static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
6234
|
-
const char *segment, int *map)
|
6235
|
-
{
|
4678
|
+
const char *segment, int *map) {
|
6236
4679
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
6237
4680
|
FrtOutStream *tix_out, *tis_out, *tfx_out, *frq_out, *prx_out;
|
6238
4681
|
FrtInStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
|
@@ -6241,29 +4684,29 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
6241
4684
|
char *sr_segment = sr->si->name;
|
6242
4685
|
|
6243
4686
|
sprintf(file_name, "%s.tix", segment);
|
6244
|
-
tix_out = store_out->new_output(store_out, file_name);
|
4687
|
+
tix_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6245
4688
|
sprintf(file_name, "%s.tix", sr_segment);
|
6246
|
-
tix_in = store_in->open_input(store_in, file_name);
|
4689
|
+
tix_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6247
4690
|
|
6248
4691
|
sprintf(file_name, "%s.tis", segment);
|
6249
|
-
tis_out = store_out->new_output(store_out, file_name);
|
4692
|
+
tis_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6250
4693
|
sprintf(file_name, "%s.tis", sr_segment);
|
6251
|
-
tis_in = store_in->open_input(store_in, file_name);
|
4694
|
+
tis_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6252
4695
|
|
6253
4696
|
sprintf(file_name, "%s.tfx", segment);
|
6254
|
-
tfx_out = store_out->new_output(store_out, file_name);
|
4697
|
+
tfx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6255
4698
|
sprintf(file_name, "%s.tfx", sr_segment);
|
6256
|
-
tfx_in = store_in->open_input(store_in, file_name);
|
4699
|
+
tfx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6257
4700
|
|
6258
4701
|
sprintf(file_name, "%s.frq", segment);
|
6259
|
-
frq_out = store_out->new_output(store_out, file_name);
|
4702
|
+
frq_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6260
4703
|
sprintf(file_name, "%s.frq", sr_segment);
|
6261
|
-
frq_in = store_in->open_input(store_in, file_name);
|
4704
|
+
frq_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6262
4705
|
|
6263
4706
|
sprintf(file_name, "%s.prx", segment);
|
6264
|
-
prx_out = store_out->new_output(store_out, file_name);
|
4707
|
+
prx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
6265
4708
|
sprintf(file_name, "%s.prx", sr_segment);
|
6266
|
-
prx_in = store_in->open_input(store_in, file_name);
|
4709
|
+
prx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
6267
4710
|
|
6268
4711
|
if (map) {
|
6269
4712
|
int field_cnt = frt_is_read_u32(tfx_in);
|
@@ -6278,8 +4721,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
6278
4721
|
frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* index size */
|
6279
4722
|
frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* dict size */
|
6280
4723
|
}
|
6281
|
-
}
|
6282
|
-
else {
|
4724
|
+
} else {
|
6283
4725
|
frt_is2os_copy_bytes(tfx_in, tfx_out, frt_is_length(tfx_in));
|
6284
4726
|
}
|
6285
4727
|
frt_is2os_copy_bytes(tix_in, tix_out, frt_is_length(tix_in));
|
@@ -6300,8 +4742,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
6300
4742
|
}
|
6301
4743
|
|
6302
4744
|
static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
6303
|
-
FrtSegmentInfo *si, int *map)
|
6304
|
-
{
|
4745
|
+
FrtSegmentInfo *si, int *map) {
|
6305
4746
|
int i;
|
6306
4747
|
FrtFieldInfos *fis = IR(sr)->fis;
|
6307
4748
|
const int field_cnt = fis->size;
|
@@ -6317,10 +4758,10 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
6317
4758
|
FrtStore *store = IR(sr)->store;
|
6318
4759
|
int field_num = map ? map[i] : i;
|
6319
4760
|
|
6320
|
-
norms_in = store->open_input(store, file_name_in);
|
4761
|
+
norms_in = store->open_input(store, segm_idx_name, file_name_in);
|
6321
4762
|
frt_si_advance_norm_gen(si, field_num);
|
6322
4763
|
si_norm_file_name(si, file_name_out, field_num);
|
6323
|
-
norms_out = store_out->new_output(store_out, file_name_out);
|
4764
|
+
norms_out = store_out->new_output(store_out, segm_idx_name, file_name_out);
|
6324
4765
|
frt_is2os_copy_bytes(norms_in, norms_out, frt_is_length(norms_in));
|
6325
4766
|
frt_os_close(norms_out);
|
6326
4767
|
frt_is_close(norms_in);
|
@@ -6398,7 +4839,6 @@ static void iw_add_segments(FrtIndexWriter *iw, FrtIndexReader *ir) {
|
|
6398
4839
|
void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, const int r_cnt) {
|
6399
4840
|
int i;
|
6400
4841
|
pthread_mutex_lock(&iw->mutex);
|
6401
|
-
iw_optimize_i(iw);
|
6402
4842
|
|
6403
4843
|
for (i = 0; i < r_cnt; i++) {
|
6404
4844
|
iw_add_segments(iw, readers[i]);
|
@@ -6410,6 +4850,5 @@ void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, const int
|
|
6410
4850
|
frt_sis_write(iw->sis, iw->store, iw->deleter);
|
6411
4851
|
pthread_mutex_unlock(&iw->store->mutex);
|
6412
4852
|
|
6413
|
-
iw_optimize_i(iw);
|
6414
4853
|
pthread_mutex_unlock(&iw->mutex);
|
6415
4854
|
}
|