isomorfeus-ferret 0.17.2 → 0.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -44,13 +44,12 @@ static char *ste_next(FrtTermEnum *te);
|
|
44
44
|
|
45
45
|
/* *** Must be three characters *** */
|
46
46
|
static const char *INDEX_EXTENSIONS[] = {
|
47
|
-
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
|
47
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
|
48
48
|
};
|
49
49
|
|
50
50
|
static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
51
51
|
|
52
|
-
static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
|
53
|
-
{
|
52
|
+
static char *u64_to_str36(char *buf, int buf_size, frt_u64 u) {
|
54
53
|
int i = buf_size - 1;
|
55
54
|
buf[i] = '\0';
|
56
55
|
for (i--; i >= 0; i--) {
|
@@ -67,17 +66,14 @@ static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
|
|
67
66
|
return buf + i;
|
68
67
|
}
|
69
68
|
|
70
|
-
static frt_u64 str36_to_u64(char *p)
|
71
|
-
{
|
69
|
+
static frt_u64 str36_to_u64(char *p) {
|
72
70
|
frt_u64 u = 0;
|
73
71
|
while (true) {
|
74
72
|
if ('0' <= *p && '9' >= *p) {
|
75
73
|
u = u * 36 + *p - '0';
|
76
|
-
}
|
77
|
-
else if ('a' <= *p && 'z' >= *p) {
|
74
|
+
} else if ('a' <= *p && 'z' >= *p) {
|
78
75
|
u = u * 36 + *p - 'a' + 10;
|
79
|
-
}
|
80
|
-
else {
|
76
|
+
} else {
|
81
77
|
break;
|
82
78
|
}
|
83
79
|
p++;
|
@@ -134,12 +130,10 @@ static char *fn_for_gen_field(char *buf,
|
|
134
130
|
const char *base,
|
135
131
|
const char *ext,
|
136
132
|
frt_i64 gen,
|
137
|
-
int field_num)
|
138
|
-
{
|
133
|
+
int field_num) {
|
139
134
|
if (-1 == gen) {
|
140
135
|
return NULL;
|
141
|
-
}
|
142
|
-
else {
|
136
|
+
} else {
|
143
137
|
char b[FRT_SEGMENT_NAME_MAX_LENGTH];
|
144
138
|
sprintf(buf, "%s_%s.%s%d",
|
145
139
|
base,
|
@@ -156,18 +150,15 @@ static char *fn_for_gen_field(char *buf,
|
|
156
150
|
*
|
157
151
|
***************************************************************************/
|
158
152
|
|
159
|
-
static unsigned long co_hash(const void *key)
|
160
|
-
|
161
|
-
return (unsigned long)key;
|
153
|
+
static unsigned long co_hash(const void *key) {
|
154
|
+
return (unsigned long)(uintptr_t)key;
|
162
155
|
}
|
163
156
|
|
164
|
-
static int co_eq(const void *key1, const void *key2)
|
165
|
-
{
|
157
|
+
static int co_eq(const void *key1, const void *key2) {
|
166
158
|
return (key1 == key2);
|
167
159
|
}
|
168
160
|
|
169
|
-
static void co_destroy(FrtCacheObject *self)
|
170
|
-
{
|
161
|
+
static void co_destroy(FrtCacheObject *self) {
|
171
162
|
frt_h_rem(self->ref_tab1, self->ref2, false);
|
172
163
|
frt_h_rem(self->ref_tab2, self->ref1, false);
|
173
164
|
self->destroy(self->obj);
|
@@ -175,8 +166,7 @@ static void co_destroy(FrtCacheObject *self)
|
|
175
166
|
}
|
176
167
|
|
177
168
|
FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
|
178
|
-
void *ref1, void *ref2, frt_free_ft destroy, void *obj)
|
179
|
-
{
|
169
|
+
void *ref1, void *ref2, frt_free_ft destroy, void *obj) {
|
180
170
|
FrtCacheObject *self = FRT_ALLOC(FrtCacheObject);
|
181
171
|
frt_h_set(ref_tab1, ref2, self);
|
182
172
|
frt_h_set(ref_tab2, ref1, self);
|
@@ -193,302 +183,13 @@ FrtHash *frt_co_hash_create(void) {
|
|
193
183
|
return frt_h_new(&co_hash, &co_eq, (frt_free_ft)NULL, (frt_free_ft)&co_destroy);
|
194
184
|
}
|
195
185
|
|
196
|
-
/****************************************************************************
|
197
|
-
*
|
198
|
-
* FieldInfo
|
199
|
-
*
|
200
|
-
****************************************************************************/
|
201
|
-
|
202
|
-
static void fi_check_params(unsigned int bits) {
|
203
|
-
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
204
|
-
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
205
|
-
}
|
206
|
-
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
207
|
-
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
208
|
-
}
|
209
|
-
}
|
210
|
-
|
211
|
-
FrtFieldInfo *frt_fi_alloc(void) {
|
212
|
-
return FRT_ALLOC(FrtFieldInfo);
|
213
|
-
}
|
214
|
-
|
215
|
-
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
216
|
-
assert(NULL != name);
|
217
|
-
fi_check_params(bits);
|
218
|
-
fi->name = name;
|
219
|
-
fi->boost = 1.0f;
|
220
|
-
fi->bits = bits;
|
221
|
-
fi->number = 0;
|
222
|
-
fi->ref_cnt = 1;
|
223
|
-
fi->rfi = Qnil;
|
224
|
-
return fi;
|
225
|
-
}
|
226
|
-
|
227
|
-
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
228
|
-
FrtFieldInfo *fi = frt_fi_alloc();
|
229
|
-
return frt_fi_init(fi, name, bits);
|
230
|
-
}
|
231
|
-
|
232
|
-
void frt_fi_deref(FrtFieldInfo *fi) {
|
233
|
-
if (FRT_DEREF(fi) == 0) free(fi);
|
234
|
-
}
|
235
|
-
|
236
|
-
FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
|
237
|
-
if (bits_is_compressed(fi->bits)) {
|
238
|
-
if (bits_is_compressed_brotli(fi->bits)) {
|
239
|
-
return FRT_COMPRESSION_BROTLI;
|
240
|
-
} else if (bits_is_compressed_bz2(fi->bits)) {
|
241
|
-
return FRT_COMPRESSION_BZ2;
|
242
|
-
} else if (bits_is_compressed_lz4(fi->bits)) {
|
243
|
-
return FRT_COMPRESSION_LZ4;
|
244
|
-
} else {
|
245
|
-
return FRT_COMPRESSION_BROTLI;
|
246
|
-
}
|
247
|
-
} else {
|
248
|
-
return FRT_COMPRESSION_NONE;
|
249
|
-
}
|
250
|
-
}
|
251
|
-
|
252
|
-
char *frt_fi_to_s(FrtFieldInfo *fi)
|
253
|
-
{
|
254
|
-
const char *fi_name = rb_id2name(fi->name);
|
255
|
-
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
256
|
-
char *s = str;
|
257
|
-
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
258
|
-
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
259
|
-
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
260
|
-
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
261
|
-
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
262
|
-
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
263
|
-
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
264
|
-
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
265
|
-
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
266
|
-
s -= 2;
|
267
|
-
if (*s != ',') {
|
268
|
-
s += 2;
|
269
|
-
}
|
270
|
-
|
271
|
-
sprintf(s, ")]");
|
272
|
-
return str;
|
273
|
-
}
|
274
|
-
|
275
|
-
/****************************************************************************
|
276
|
-
*
|
277
|
-
* FieldInfos
|
278
|
-
*
|
279
|
-
****************************************************************************/
|
280
|
-
|
281
|
-
FrtFieldInfos *frt_fis_alloc(void) {
|
282
|
-
return FRT_ALLOC(FrtFieldInfos);
|
283
|
-
}
|
284
|
-
|
285
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
286
|
-
fi_check_params(bits);
|
287
|
-
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
288
|
-
fis->size = 0;
|
289
|
-
fis->capa = FIELD_INFOS_INIT_CAPA;
|
290
|
-
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
291
|
-
fis->bits = bits;
|
292
|
-
fis->ref_cnt = 1;
|
293
|
-
fis->rfis = Qnil;
|
294
|
-
return fis;
|
295
|
-
}
|
296
|
-
|
297
|
-
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
298
|
-
FrtFieldInfos *fis = frt_fis_alloc();
|
299
|
-
return frt_fis_init(fis, bits);
|
300
|
-
}
|
301
|
-
|
302
|
-
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
303
|
-
if (fis->size == fis->capa) {
|
304
|
-
fis->capa <<= 1;
|
305
|
-
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
306
|
-
}
|
307
|
-
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
308
|
-
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
309
|
-
}
|
310
|
-
FRT_REF(fi);
|
311
|
-
fi->number = fis->size;
|
312
|
-
fis->fields[fis->size] = fi;
|
313
|
-
fis->size++;
|
314
|
-
return fi;
|
315
|
-
}
|
316
|
-
|
317
|
-
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
318
|
-
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
319
|
-
}
|
320
|
-
|
321
|
-
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
322
|
-
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
323
|
-
if (fi) { return fi->number; }
|
324
|
-
else { return -1; }
|
325
|
-
}
|
326
|
-
|
327
|
-
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
328
|
-
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
329
|
-
if (!fi) {
|
330
|
-
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
331
|
-
frt_fis_add_field(fis, fi);
|
332
|
-
}
|
333
|
-
return fi;
|
334
|
-
}
|
335
|
-
|
336
|
-
FrtFieldInfos *frt_fis_read(FrtInStream *is)
|
337
|
-
{
|
338
|
-
FrtFieldInfos *volatile fis = NULL;
|
339
|
-
char *field_name;
|
340
|
-
FRT_TRY
|
341
|
-
do {
|
342
|
-
volatile int i;
|
343
|
-
union { frt_u32 i; float f; } tmp;
|
344
|
-
FrtFieldInfo *volatile fi;
|
345
|
-
fis = frt_fis_new(frt_is_read_vint(is));
|
346
|
-
for (i = frt_is_read_vint(is); i > 0; i--) {
|
347
|
-
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
348
|
-
FRT_TRY
|
349
|
-
field_name = frt_is_read_string_safe(is);
|
350
|
-
fi->name = rb_intern(field_name);
|
351
|
-
free(field_name);
|
352
|
-
tmp.i = frt_is_read_u32(is);
|
353
|
-
fi->boost = tmp.f;
|
354
|
-
fi->bits = frt_is_read_vint(is);
|
355
|
-
FRT_XCATCHALL
|
356
|
-
free(fi);
|
357
|
-
FRT_XENDTRY
|
358
|
-
frt_fis_add_field(fis, fi);
|
359
|
-
fi->ref_cnt = 1;
|
360
|
-
}
|
361
|
-
} while (0);
|
362
|
-
FRT_XCATCHALL
|
363
|
-
frt_fis_deref(fis);
|
364
|
-
FRT_XENDTRY
|
365
|
-
return fis;
|
366
|
-
}
|
367
|
-
|
368
|
-
void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
369
|
-
{
|
370
|
-
int i;
|
371
|
-
union { frt_u32 i; float f; } tmp;
|
372
|
-
FrtFieldInfo *fi;
|
373
|
-
const int fis_size = fis->size;
|
374
|
-
|
375
|
-
frt_os_write_vint(os, fis->bits);
|
376
|
-
frt_os_write_vint(os, fis->size);
|
377
|
-
|
378
|
-
for (i = 0; i < fis_size; i++) {
|
379
|
-
fi = fis->fields[i];
|
380
|
-
|
381
|
-
frt_os_write_string(os, rb_id2name(fi->name));
|
382
|
-
tmp.f = fi->boost;
|
383
|
-
frt_os_write_u32(os, tmp.i);
|
384
|
-
frt_os_write_vint(os, fi->bits);
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
|
-
static const char *store_str[] = {
|
389
|
-
":no",
|
390
|
-
":yes",
|
391
|
-
"",
|
392
|
-
":compressed"
|
393
|
-
};
|
394
|
-
|
395
|
-
static const char *fi_store_str(FrtFieldInfo *fi)
|
396
|
-
{
|
397
|
-
return store_str[fi->bits & 0x3];
|
398
|
-
}
|
399
|
-
|
400
|
-
static const char *index_str[] = {
|
401
|
-
":no",
|
402
|
-
":untokenized",
|
403
|
-
"",
|
404
|
-
":yes",
|
405
|
-
"",
|
406
|
-
":untokenized_omit_norms",
|
407
|
-
"",
|
408
|
-
":omit_norms"
|
409
|
-
};
|
410
|
-
|
411
|
-
static const char *fi_index_str(FrtFieldInfo *fi)
|
412
|
-
{
|
413
|
-
return index_str[(fi->bits >> 2) & 0x7];
|
414
|
-
}
|
415
|
-
|
416
|
-
static const char *term_vector_str[] = {
|
417
|
-
":no",
|
418
|
-
":yes",
|
419
|
-
"",
|
420
|
-
":with_positions",
|
421
|
-
"",
|
422
|
-
":with_offsets",
|
423
|
-
"",
|
424
|
-
":with_positions_offsets"
|
425
|
-
};
|
426
|
-
|
427
|
-
static const char *fi_term_vector_str(FrtFieldInfo *fi)
|
428
|
-
{
|
429
|
-
return term_vector_str[(fi->bits >> 5) & 0x7];
|
430
|
-
}
|
431
|
-
|
432
|
-
char *frt_fis_to_s(FrtFieldInfos *fis)
|
433
|
-
{
|
434
|
-
int i, pos, capa = 200 + fis->size * 120;
|
435
|
-
char *buf = FRT_ALLOC_N(char, capa);
|
436
|
-
FrtFieldInfo *fi;
|
437
|
-
const int fis_size = fis->size;
|
438
|
-
|
439
|
-
pos = sprintf(buf,
|
440
|
-
"default:\n"
|
441
|
-
" store: %s\n"
|
442
|
-
" index: %s\n"
|
443
|
-
" term_vector: %s\n"
|
444
|
-
"fields:\n",
|
445
|
-
store_str[fis->bits & 0x3],
|
446
|
-
index_str[(fis->bits >> 2) & 0x7],
|
447
|
-
term_vector_str[(fis->bits >> 5) & 0x7]);
|
448
|
-
for (i = 0; i < fis_size; i++) {
|
449
|
-
fi = fis->fields[i];
|
450
|
-
pos += sprintf(buf + pos,
|
451
|
-
" %s:\n"
|
452
|
-
" boost: %f\n"
|
453
|
-
" store: %s\n"
|
454
|
-
" index: %s\n"
|
455
|
-
" term_vector: %s\n",
|
456
|
-
rb_id2name(fi->name), fi->boost, fi_store_str(fi),
|
457
|
-
fi_index_str(fi), fi_term_vector_str(fi));
|
458
|
-
}
|
459
|
-
|
460
|
-
return buf;
|
461
|
-
}
|
462
|
-
|
463
|
-
void frt_fis_deref(FrtFieldInfos *fis) {
|
464
|
-
if (FRT_DEREF(fis) == 0) {
|
465
|
-
frt_h_destroy(fis->field_dict);
|
466
|
-
free(fis->fields);
|
467
|
-
free(fis);
|
468
|
-
}
|
469
|
-
}
|
470
|
-
|
471
|
-
static bool fis_has_vectors(FrtFieldInfos *fis)
|
472
|
-
{
|
473
|
-
int i;
|
474
|
-
const int fis_size = fis->size;
|
475
|
-
|
476
|
-
for (i = 0; i < fis_size; i++) {
|
477
|
-
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
478
|
-
return true;
|
479
|
-
}
|
480
|
-
}
|
481
|
-
return false;
|
482
|
-
}
|
483
|
-
|
484
186
|
/****************************************************************************
|
485
187
|
*
|
486
188
|
* SegmentInfo
|
487
189
|
*
|
488
190
|
****************************************************************************/
|
489
191
|
|
490
|
-
FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
|
491
|
-
{
|
192
|
+
FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store) {
|
492
193
|
FrtSegmentInfo *si = FRT_ALLOC(FrtSegmentInfo);
|
493
194
|
si->name = name;
|
494
195
|
si->doc_cnt = doc_cnt;
|
@@ -501,8 +202,7 @@ FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
|
|
501
202
|
return si;
|
502
203
|
}
|
503
204
|
|
504
|
-
static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
|
505
|
-
{
|
205
|
+
static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is) {
|
506
206
|
FrtSegmentInfo *volatile si = FRT_ALLOC_AND_ZERO(FrtSegmentInfo);
|
507
207
|
FRT_TRY
|
508
208
|
si->store = store;
|
@@ -527,8 +227,7 @@ static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
|
|
527
227
|
return si;
|
528
228
|
}
|
529
229
|
|
530
|
-
static void si_write(FrtSegmentInfo *si, FrtOutStream *os)
|
531
|
-
{
|
230
|
+
static void si_write(FrtSegmentInfo *si, FrtOutStream *os) {
|
532
231
|
frt_os_write_string(os, si->name);
|
533
232
|
frt_os_write_vint(os, si->doc_cnt);
|
534
233
|
frt_os_write_vint(os, si->del_gen);
|
@@ -550,13 +249,11 @@ void frt_si_close(FrtSegmentInfo *si) {
|
|
550
249
|
}
|
551
250
|
}
|
552
251
|
|
553
|
-
bool frt_si_has_deletions(FrtSegmentInfo *si)
|
554
|
-
{
|
252
|
+
bool frt_si_has_deletions(FrtSegmentInfo *si) {
|
555
253
|
return si->del_gen >= 0;
|
556
254
|
}
|
557
255
|
|
558
|
-
void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
|
559
|
-
{
|
256
|
+
void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num) {
|
560
257
|
if (field_num >= si->norm_gens_size) {
|
561
258
|
int i;
|
562
259
|
FRT_REALLOC_N(si->norm_gens, int, field_num + 1);
|
@@ -568,8 +265,7 @@ void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
|
|
568
265
|
si->norm_gens[field_num]++;
|
569
266
|
}
|
570
267
|
|
571
|
-
static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
|
572
|
-
{
|
268
|
+
static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num) {
|
573
269
|
int norm_gen;
|
574
270
|
if (field_num >= si->norm_gens_size
|
575
271
|
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
@@ -588,9 +284,7 @@ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
|
588
284
|
*
|
589
285
|
****************************************************************************/
|
590
286
|
|
591
|
-
|
592
|
-
static char *new_segment(frt_i64 generation)
|
593
|
-
{
|
287
|
+
static char *new_segment(frt_i64 generation) {
|
594
288
|
char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
|
595
289
|
char *fn_p = u64_to_str36(buf, FRT_SEGMENT_NAME_MAX_LENGTH - 1,
|
596
290
|
(frt_u64)generation);
|
@@ -611,8 +305,7 @@ typedef struct FindSegmentsFile {
|
|
611
305
|
} ret;
|
612
306
|
} FindSegmentsFile;
|
613
307
|
|
614
|
-
static void which_gen_i(const char *file_name, void *arg)
|
615
|
-
{
|
308
|
+
static void which_gen_i(const char *file_name, void *arg) {
|
616
309
|
frt_i64 *max_generation = (frt_i64 *)arg;
|
617
310
|
if (0 == strncmp(FRT_SEGMENTS_FILE_NAME"_", file_name,
|
618
311
|
sizeof(FRT_SEGMENTS_FILE_NAME))) {
|
@@ -658,10 +351,9 @@ void frt_sis_put(FrtSegmentInfos *sis, FILE *stream) {
|
|
658
351
|
*
|
659
352
|
* @param store - the Store to look in
|
660
353
|
*/
|
661
|
-
frt_i64 frt_sis_current_segment_generation(FrtStore *store)
|
662
|
-
{
|
354
|
+
frt_i64 frt_sis_current_segment_generation(FrtStore *store) {
|
663
355
|
frt_i64 current_generation = -1;
|
664
|
-
store->each(store, &which_gen_i, ¤t_generation);
|
356
|
+
store->each(store, segm_idx_name, &which_gen_i, ¤t_generation);
|
665
357
|
return current_generation;
|
666
358
|
}
|
667
359
|
|
@@ -672,8 +364,7 @@ frt_i64 frt_sis_current_segment_generation(FrtStore *store)
|
|
672
364
|
* @param store - the Store to look in
|
673
365
|
* @return segments_N where N is the current generation
|
674
366
|
*/
|
675
|
-
char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
|
676
|
-
{
|
367
|
+
char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store) {
|
677
368
|
return segfn_for_generation(buf, frt_sis_current_segment_generation(store));
|
678
369
|
}
|
679
370
|
|
@@ -686,16 +377,14 @@ char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
|
|
686
377
|
*/
|
687
378
|
/*
|
688
379
|
FIXME: not used
|
689
|
-
static char *sis_next_seg_file_name(char *buf, FrtStore *store)
|
690
|
-
{
|
380
|
+
static char *sis_next_seg_file_name(char *buf, FrtStore *store) {
|
691
381
|
return segfn_for_generation(buf, frt_sis_current_segment_generation(store) + 1);
|
692
382
|
}
|
693
383
|
*/
|
694
384
|
|
695
385
|
#define GEN_FILE_RETRY_COUNT 10
|
696
386
|
#define GEN_LOOK_AHEAD_COUNT 10
|
697
|
-
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir)
|
698
|
-
{
|
387
|
+
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir) {
|
699
388
|
volatile int i;
|
700
389
|
volatile int gen_look_ahead_count = 0;
|
701
390
|
volatile bool retry = false;
|
@@ -732,7 +421,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
732
421
|
FrtInStream *gen_is;
|
733
422
|
gen_is = NULL;
|
734
423
|
FRT_TRY
|
735
|
-
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
424
|
+
gen_is = store->open_input(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
|
736
425
|
FRT_XCATCHALL
|
737
426
|
FRT_HANDLED();
|
738
427
|
/* TODO:LOG "segments open: FRT_IO_ERROR"*/
|
@@ -782,7 +471,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
782
471
|
* this must be a real error. We throw the original exception
|
783
472
|
* we got. */
|
784
473
|
char *listing, listing_buffer[1024];
|
785
|
-
listing =
|
474
|
+
listing = frt_store_folder_to_s(store, segm_idx_name);
|
786
475
|
strncpy(listing_buffer, listing, 1023);
|
787
476
|
listing_buffer[1023] = '\0';
|
788
477
|
free(listing);
|
@@ -815,7 +504,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
815
504
|
* and try it if so: */
|
816
505
|
char prev_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
817
506
|
segfn_for_generation(prev_seg_file_name, gen - 1);
|
818
|
-
if (store->exists(store, prev_seg_file_name)) {
|
507
|
+
if (store->exists(store, segm_idx_name, prev_seg_file_name)) {
|
819
508
|
/* TODO:LOG "fallback to prior segment file '" +
|
820
509
|
* prevSegmentFileName + "'" */
|
821
510
|
FRT_TRY
|
@@ -839,8 +528,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
839
528
|
}
|
840
529
|
}
|
841
530
|
|
842
|
-
FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
|
843
|
-
{
|
531
|
+
FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis) {
|
844
532
|
FrtSegmentInfos *sis = FRT_ALLOC_AND_ZERO(FrtSegmentInfos);
|
845
533
|
FRT_REF(fis);
|
846
534
|
sis->fis = fis;
|
@@ -854,13 +542,11 @@ FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
|
|
854
542
|
return sis;
|
855
543
|
}
|
856
544
|
|
857
|
-
FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store)
|
858
|
-
{
|
545
|
+
FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store) {
|
859
546
|
return frt_sis_add_si(sis, frt_si_new(new_segment(sis->counter++), doc_cnt, store));
|
860
547
|
}
|
861
548
|
|
862
|
-
void frt_sis_destroy(FrtSegmentInfos *sis)
|
863
|
-
{
|
549
|
+
void frt_sis_destroy(FrtSegmentInfos *sis) {
|
864
550
|
int i;
|
865
551
|
const int sis_size = sis->size;
|
866
552
|
for (i = 0; i < sis_size; i++) {
|
@@ -872,8 +558,7 @@ void frt_sis_destroy(FrtSegmentInfos *sis)
|
|
872
558
|
free(sis);
|
873
559
|
}
|
874
560
|
|
875
|
-
FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
|
876
|
-
{
|
561
|
+
FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si) {
|
877
562
|
if (sis->size >= sis->capa) {
|
878
563
|
sis->capa <<= 1;
|
879
564
|
FRT_REALLOC_N(sis->segs, FrtSegmentInfo *, sis->capa);
|
@@ -882,8 +567,7 @@ FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
|
|
882
567
|
return si;
|
883
568
|
}
|
884
569
|
|
885
|
-
void frt_sis_del_at(FrtSegmentInfos *sis, int at)
|
886
|
-
{
|
570
|
+
void frt_sis_del_at(FrtSegmentInfos *sis, int at) {
|
887
571
|
int i;
|
888
572
|
const int sis_size = --(sis->size);
|
889
573
|
frt_si_close(sis->segs[at]);
|
@@ -892,8 +576,7 @@ void frt_sis_del_at(FrtSegmentInfos *sis, int at)
|
|
892
576
|
}
|
893
577
|
}
|
894
578
|
|
895
|
-
void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
896
|
-
{
|
579
|
+
void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to) {
|
897
580
|
int i, num_to_del = to - from;
|
898
581
|
const int sis_size = sis->size -= num_to_del;
|
899
582
|
for (i = from; i < to; i++) {
|
@@ -904,8 +587,7 @@ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
|
904
587
|
}
|
905
588
|
}
|
906
589
|
|
907
|
-
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
908
|
-
{
|
590
|
+
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
|
909
591
|
int seg_cnt;
|
910
592
|
int i;
|
911
593
|
frt_u32 format = 0;
|
@@ -916,7 +598,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
916
598
|
segfn_for_generation(seg_file_name, fsf->generation);
|
917
599
|
fsf->ret.sis = NULL;
|
918
600
|
FRT_TRY
|
919
|
-
is = store->open_input(store, seg_file_name);
|
601
|
+
is = store->open_input(store, segm_idx_name, seg_file_name);
|
920
602
|
sis->store = store;
|
921
603
|
FRT_REF(store);
|
922
604
|
sis->generation = fsf->generation;
|
@@ -942,22 +624,20 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
942
624
|
fsf->ret.sis = sis;
|
943
625
|
}
|
944
626
|
|
945
|
-
FrtSegmentInfos *frt_sis_read(FrtStore *store)
|
946
|
-
{
|
627
|
+
FrtSegmentInfos *frt_sis_read(FrtStore *store) {
|
947
628
|
FindSegmentsFile fsf;
|
948
629
|
sis_find_segments_file(store, &fsf, &frt_sis_read_i, NULL);
|
949
630
|
return fsf.ret.sis;
|
950
631
|
}
|
951
632
|
|
952
|
-
void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
953
|
-
{
|
633
|
+
void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter) {
|
954
634
|
int i;
|
955
635
|
FrtOutStream *volatile os = NULL;
|
956
636
|
const int sis_size = sis->size;
|
957
637
|
char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
|
958
638
|
sis->generation++;
|
959
639
|
FRT_TRY
|
960
|
-
os = store->new_output(store, segfn_for_generation(buf, sis->generation));
|
640
|
+
os = store->new_output(store, segm_idx_name, segfn_for_generation(buf, sis->generation));
|
961
641
|
frt_os_write_u32(os, FORMAT);
|
962
642
|
frt_os_write_u64(os, ++(sis->version)); /* every write changes the index */
|
963
643
|
frt_os_write_u64(os, sis->counter);
|
@@ -971,7 +651,7 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
971
651
|
FRT_XENDTRY
|
972
652
|
|
973
653
|
FRT_TRY
|
974
|
-
os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
|
654
|
+
os = store->new_output(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
|
975
655
|
frt_os_write_u64(os, sis->generation);
|
976
656
|
frt_os_write_u64(os, sis->generation);
|
977
657
|
FRT_XFINALLY
|
@@ -985,15 +665,14 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
985
665
|
}
|
986
666
|
}
|
987
667
|
|
988
|
-
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
989
|
-
{
|
668
|
+
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
|
990
669
|
FrtInStream *is;
|
991
670
|
frt_u32 format = 0;
|
992
671
|
frt_u64 version = 0;
|
993
672
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
994
673
|
|
995
674
|
segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
|
996
|
-
is = store->open_input(store, seg_file_name);
|
675
|
+
is = store->open_input(store, segm_idx_name, seg_file_name);
|
997
676
|
|
998
677
|
FRT_TRY
|
999
678
|
format = frt_is_read_u32(is); // format
|
@@ -1006,8 +685,7 @@ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexR
|
|
1006
685
|
fsf->ret.uint64 = version;
|
1007
686
|
}
|
1008
687
|
|
1009
|
-
frt_u64 frt_sis_read_current_version(FrtStore *store)
|
1010
|
-
{
|
688
|
+
frt_u64 frt_sis_read_current_version(FrtStore *store) {
|
1011
689
|
FindSegmentsFile fsf;
|
1012
690
|
sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i, NULL);
|
1013
691
|
return fsf.ret.uint64;
|
@@ -1029,9 +707,9 @@ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1029
707
|
fr->fis = fis;
|
1030
708
|
|
1031
709
|
strcpy(file_name + segment_len, ".fdt");
|
1032
|
-
fr->fdt_in = store->open_input(store, file_name);
|
710
|
+
fr->fdt_in = store->open_input(store, segm_idx_name, file_name);
|
1033
711
|
strcpy(file_name + segment_len, ".fdx");
|
1034
|
-
fr->fdx_in = store->open_input(store, file_name);
|
712
|
+
fr->fdx_in = store->open_input(store, segm_idx_name, file_name);
|
1035
713
|
fr->size = frt_is_length(fr->fdx_in) / FIELDS_IDX_PTR_SIZE;
|
1036
714
|
fr->store = store;
|
1037
715
|
FRT_REF(store);
|
@@ -1057,32 +735,30 @@ void frt_fr_close(FrtFieldsReader *fr) {
|
|
1057
735
|
free(fr);
|
1058
736
|
}
|
1059
737
|
|
1060
|
-
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType
|
738
|
+
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression_type) {
|
1061
739
|
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
1062
740
|
df->name = name;
|
1063
741
|
df->capa = df->size = size;
|
1064
|
-
df->data = FRT_ALLOC_N(char *, df->capa);
|
742
|
+
df->data = FRT_ALLOC_N(const char *, df->capa);
|
1065
743
|
df->lengths = FRT_ALLOC_N(int, df->capa);
|
1066
744
|
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
1067
|
-
df->destroy_data = true;
|
1068
745
|
df->boost = 1.0f;
|
1069
|
-
df->
|
746
|
+
df->compression_type = compression_type;
|
1070
747
|
return df;
|
1071
748
|
}
|
1072
749
|
|
1073
|
-
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType
|
750
|
+
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression_type) {
|
1074
751
|
int i;
|
1075
752
|
const int df_size = df->size;
|
1076
753
|
FrtInStream *fdt_in = fr->fdt_in;
|
1077
754
|
|
1078
755
|
for (i = 0; i < df_size; i++) {
|
1079
|
-
const int compressed_len = df->lengths[i]
|
1080
|
-
df->data[i] = frt_is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]),
|
756
|
+
const int compressed_len = df->lengths[i];
|
757
|
+
df->data[i] = frt_is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression_type);
|
1081
758
|
}
|
1082
759
|
}
|
1083
760
|
|
1084
|
-
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
1085
|
-
{
|
761
|
+
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num) {
|
1086
762
|
int i, j;
|
1087
763
|
frt_off_t pos;
|
1088
764
|
int stored_cnt;
|
@@ -1098,28 +774,29 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1098
774
|
for (i = 0; i < stored_cnt; i++) {
|
1099
775
|
const int field_num = frt_is_read_vint(fdt_in);
|
1100
776
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
1101
|
-
const int
|
1102
|
-
FrtDocField *df = frt_fr_df_new(fi->name,
|
777
|
+
const int df_field_count = frt_is_read_vint(fdt_in);
|
778
|
+
FrtDocField *df = frt_fr_df_new(fi->name, df_field_count, bits_get_compression_type(fi->bits));
|
1103
779
|
|
1104
|
-
for (j = 0; j <
|
780
|
+
for (j = 0; j < df_field_count; j++) {
|
1105
781
|
df->lengths[j] = frt_is_read_vint(fdt_in);
|
1106
782
|
df->encodings[j] = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1107
|
-
df->
|
783
|
+
df->compression_type = frt_is_read_vint(fdt_in);
|
1108
784
|
}
|
1109
785
|
|
1110
786
|
frt_doc_add_field(doc, df);
|
1111
787
|
}
|
1112
788
|
for (i = 0; i < stored_cnt; i++) {
|
1113
789
|
FrtDocField *df = doc->fields[i];
|
1114
|
-
if (df->
|
1115
|
-
frt_fr_read_compressed_fields(fr, df, df->
|
790
|
+
if (df->compression_type != FRT_COMPRESSION_NONE) {
|
791
|
+
frt_fr_read_compressed_fields(fr, df, df->compression_type);
|
1116
792
|
} else {
|
1117
793
|
const int df_size = df->size;
|
1118
794
|
for (j = 0; j < df_size; j++) {
|
1119
|
-
const int read_len = df->lengths[j]
|
1120
|
-
|
1121
|
-
frt_is_read_bytes(fdt_in, (frt_uchar *)
|
1122
|
-
|
795
|
+
const int read_len = df->lengths[j];
|
796
|
+
char *d = FRT_ALLOC_N(char, read_len + 1);
|
797
|
+
frt_is_read_bytes(fdt_in, (frt_uchar *)d, read_len);
|
798
|
+
d[read_len] = '\0';
|
799
|
+
df->data[j] = d;
|
1123
800
|
}
|
1124
801
|
}
|
1125
802
|
}
|
@@ -1127,8 +804,7 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1127
804
|
return doc;
|
1128
805
|
}
|
1129
806
|
|
1130
|
-
FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
1131
|
-
{
|
807
|
+
FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num) {
|
1132
808
|
int start = 0;
|
1133
809
|
int i, j;
|
1134
810
|
frt_off_t pos;
|
@@ -1146,18 +822,18 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1146
822
|
for (i = 0; i < stored_cnt; i++) {
|
1147
823
|
FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
|
1148
824
|
const int df_size = frt_is_read_vint(fdt_in);
|
1149
|
-
FrtLazyDocField *lazy_df = frt_lazy_df_new(fi->name, df_size,
|
825
|
+
FrtLazyDocField *lazy_df = frt_lazy_df_new(fi->name, df_size, bits_get_compression_type(fi->bits));
|
1150
826
|
const int field_start = start;
|
1151
827
|
/* get the starts relative positions this time around */
|
1152
828
|
|
1153
829
|
for (j = 0; j < df_size; j++) {
|
1154
830
|
lazy_df->data[j].start = start;
|
1155
|
-
start +=
|
831
|
+
start += (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
|
1156
832
|
lazy_df->data[j].encoding = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1157
|
-
lazy_df->data[j].
|
833
|
+
lazy_df->data[j].compression_type = frt_is_read_vint(fdt_in);
|
1158
834
|
}
|
1159
835
|
|
1160
|
-
lazy_df->len = start - field_start
|
836
|
+
lazy_df->len = start - field_start;
|
1161
837
|
frt_lazy_doc_add_field(lazy_doc, lazy_df, i);
|
1162
838
|
}
|
1163
839
|
/* correct the starts to their correct absolute positions */
|
@@ -1173,8 +849,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1173
849
|
return lazy_doc;
|
1174
850
|
}
|
1175
851
|
|
1176
|
-
static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num)
|
1177
|
-
{
|
852
|
+
static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num) {
|
1178
853
|
FrtTermVector *tv = FRT_ALLOC_AND_ZERO(FrtTermVector);
|
1179
854
|
FrtInStream *fdt_in = fr->fdt_in;
|
1180
855
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
@@ -1232,8 +907,7 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
|
|
1232
907
|
return tv;
|
1233
908
|
}
|
1234
909
|
|
1235
|
-
FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num)
|
1236
|
-
{
|
910
|
+
FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num) {
|
1237
911
|
FrtHash *term_vectors = frt_h_new_ptr((frt_free_ft)&frt_tv_destroy);
|
1238
912
|
int i;
|
1239
913
|
FrtInStream *fdx_in = fr->fdx_in;
|
@@ -1319,10 +993,10 @@ FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1319
993
|
memcpy(file_name, segment, segment_len);
|
1320
994
|
|
1321
995
|
strcpy(file_name + segment_len, ".fdt");
|
1322
|
-
fw->fdt_out = store->new_output(store, file_name);
|
996
|
+
fw->fdt_out = store->new_output(store, segm_idx_name, file_name);
|
1323
997
|
|
1324
998
|
strcpy(file_name + segment_len, ".fdx");
|
1325
|
-
fw->fdx_out = store->new_output(store, file_name);
|
999
|
+
fw->fdx_out = store->new_output(store, segm_idx_name, file_name);
|
1326
1000
|
|
1327
1001
|
fw->buffer = frt_ram_new_buffer();
|
1328
1002
|
|
@@ -1344,9 +1018,9 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1344
1018
|
int i, j, stored_cnt = 0;
|
1345
1019
|
FrtDocField *df;
|
1346
1020
|
FrtFieldInfo *fi;
|
1347
|
-
FrtCompressionType
|
1021
|
+
FrtCompressionType compression_type;
|
1348
1022
|
FrtOutStream *fdt_out = fw->fdt_out, *fdx_out = fw->fdx_out;
|
1349
|
-
const int doc_size = doc->
|
1023
|
+
const int doc_size = doc->field_count;
|
1350
1024
|
|
1351
1025
|
for (i = 0; i < doc_size; i++) {
|
1352
1026
|
df = doc->fields[i];
|
@@ -1370,23 +1044,20 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1370
1044
|
frt_os_write_vint(fdt_out, df_size);
|
1371
1045
|
|
1372
1046
|
if (bits_is_compressed(fi->bits)) {
|
1373
|
-
|
1047
|
+
compression_type = bits_get_compression_type(fi->bits);
|
1374
1048
|
for (j = 0; j < df_size; j++) {
|
1375
|
-
|
1376
|
-
|
1377
|
-
frt_os_write_vint(fdt_out, compressed_len - 1);
|
1049
|
+
int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], df->lengths[j], compression_type);
|
1050
|
+
frt_os_write_vint(fdt_out, compressed_len);
|
1378
1051
|
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
1379
|
-
frt_os_write_vint(fdt_out,
|
1052
|
+
frt_os_write_vint(fdt_out, compression_type);
|
1380
1053
|
}
|
1381
1054
|
} else {
|
1382
1055
|
for (j = 0; j < df_size; j++) {
|
1383
1056
|
const int length = df->lengths[j];
|
1057
|
+
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1384
1058
|
frt_os_write_vint(fdt_out, length);
|
1385
1059
|
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
1386
|
-
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
1387
|
-
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1388
|
-
/* leave a space between fields as that is how they are analyzed */
|
1389
|
-
frt_os_write_byte(fw->buffer, ' ');
|
1060
|
+
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
1390
1061
|
}
|
1391
1062
|
}
|
1392
1063
|
}
|
@@ -1413,8 +1084,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
1413
1084
|
FrtPostingList **plists,
|
1414
1085
|
int posting_count,
|
1415
1086
|
FrtOffset *offsets,
|
1416
|
-
int offset_count)
|
1417
|
-
{
|
1087
|
+
int offset_count) {
|
1418
1088
|
int i, delta_start, delta_length;
|
1419
1089
|
const char *last_term = FRT_EMPTY_STRING;
|
1420
1090
|
FrtOutStream *fdt_out = fw->fdt_out;
|
@@ -1478,19 +1148,16 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
1478
1148
|
|
1479
1149
|
#define TE(ste) ((FrtTermEnum *)ste)
|
1480
1150
|
|
1481
|
-
char *frt_te_get_term(FrtTermEnum *te)
|
1482
|
-
{
|
1151
|
+
char *frt_te_get_term(FrtTermEnum *te) {
|
1483
1152
|
return (char *)memcpy(FRT_ALLOC_N(char, te->curr_term_len + 1),
|
1484
1153
|
te->curr_term, te->curr_term_len + 1);
|
1485
1154
|
}
|
1486
1155
|
|
1487
|
-
FrtTermInfo *frt_te_get_ti(FrtTermEnum *te)
|
1488
|
-
{
|
1156
|
+
FrtTermInfo *frt_te_get_ti(FrtTermEnum *te) {
|
1489
1157
|
return (FrtTermInfo*)memcpy(FRT_ALLOC(FrtTermInfo), &(te->curr_ti), sizeof(FrtTermInfo));
|
1490
1158
|
}
|
1491
1159
|
|
1492
|
-
static char *te_skip_to(FrtTermEnum *te, const char *term)
|
1493
|
-
{
|
1160
|
+
static char *te_skip_to(FrtTermEnum *te, const char *term) {
|
1494
1161
|
char *curr_term = te->curr_term;
|
1495
1162
|
if (strcmp(curr_term, term) < 0) {
|
1496
1163
|
while (NULL != ((curr_term = te->next(te)))
|
@@ -1512,8 +1179,7 @@ static char *te_skip_to(FrtTermEnum *te, const char *term)
|
|
1512
1179
|
* SegmentTermIndex
|
1513
1180
|
****************************************************************************/
|
1514
1181
|
|
1515
|
-
static void sti_destroy(FrtSegmentTermIndex *sti)
|
1516
|
-
{
|
1182
|
+
static void sti_destroy(FrtSegmentTermIndex *sti) {
|
1517
1183
|
if (sti->index_terms) {
|
1518
1184
|
int i;
|
1519
1185
|
const int sti_index_cnt = sti->index_cnt;
|
@@ -1557,8 +1223,7 @@ static void sti_ensure_index_is_read(FrtSegmentTermIndex *sti, FrtTermEnum *inde
|
|
1557
1223
|
}
|
1558
1224
|
}
|
1559
1225
|
|
1560
|
-
static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
|
1561
|
-
{
|
1226
|
+
static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term) {
|
1562
1227
|
int lo = 0;
|
1563
1228
|
int hi = sti->index_cnt - 1;
|
1564
1229
|
int mid, delta;
|
@@ -1569,11 +1234,9 @@ static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
|
|
1569
1234
|
delta = strcmp(term, index_terms[mid]);
|
1570
1235
|
if (delta < 0) {
|
1571
1236
|
hi = mid - 1;
|
1572
|
-
}
|
1573
|
-
else if (delta > 0) {
|
1237
|
+
} else if (delta > 0) {
|
1574
1238
|
lo = mid + 1;
|
1575
|
-
}
|
1576
|
-
else {
|
1239
|
+
} else {
|
1577
1240
|
return mid;
|
1578
1241
|
}
|
1579
1242
|
}
|
@@ -1601,7 +1264,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
|
|
1601
1264
|
pthread_mutex_init(&sfi->mutex, NULL);
|
1602
1265
|
|
1603
1266
|
sprintf(file_name, "%s.tfx", segment);
|
1604
|
-
is = store->open_input(store, file_name);
|
1267
|
+
is = store->open_input(store, segm_idx_name, file_name);
|
1605
1268
|
field_count = (int)frt_is_read_u32(is);
|
1606
1269
|
sfi->index_interval = frt_is_read_vint(is);
|
1607
1270
|
sfi->skip_interval = frt_is_read_vint(is);
|
@@ -1620,7 +1283,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
|
|
1620
1283
|
frt_is_close(is);
|
1621
1284
|
|
1622
1285
|
sprintf(file_name, "%s.tix", segment);
|
1623
|
-
is = store->open_input(store, file_name);
|
1286
|
+
is = store->open_input(store, segm_idx_name, file_name);
|
1624
1287
|
FRT_DEREF(is);
|
1625
1288
|
sfi->index_te = frt_ste_new(is, sfi);
|
1626
1289
|
return sfi;
|
@@ -1750,13 +1413,11 @@ void frt_ste_close(FrtTermEnum *te) {
|
|
1750
1413
|
free(te);
|
1751
1414
|
}
|
1752
1415
|
|
1753
|
-
static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
1754
|
-
{
|
1416
|
+
static char *frt_ste_get_term(FrtTermEnum *te, int pos) {
|
1755
1417
|
FrtSegmentTermEnum *ste = STE(te);
|
1756
1418
|
if (pos >= ste->size) {
|
1757
1419
|
return NULL;
|
1758
|
-
}
|
1759
|
-
else if (pos != ste->pos) {
|
1420
|
+
} else if (pos != ste->pos) {
|
1760
1421
|
int idx_int = ste->sfi->index_interval;
|
1761
1422
|
if ((pos < ste->pos) || pos > (1 + ste->pos / idx_int) * idx_int) {
|
1762
1423
|
FrtSegmentTermIndex *sti = (FrtSegmentTermIndex *)frt_h_get_int(ste->sfi->field_dict, te->field_num);
|
@@ -1773,8 +1434,7 @@ static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
|
1773
1434
|
return te->curr_term;
|
1774
1435
|
}
|
1775
1436
|
|
1776
|
-
FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
|
1777
|
-
{
|
1437
|
+
FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi) {
|
1778
1438
|
FrtSegmentTermEnum *ste = ste_allocate();
|
1779
1439
|
|
1780
1440
|
TE(ste)->field_num = -1;
|
@@ -1794,8 +1454,7 @@ FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
|
|
1794
1454
|
|
1795
1455
|
#define MTE(te) ((MultiTermEnum *)(te))
|
1796
1456
|
|
1797
|
-
typedef struct TermEnumWrapper
|
1798
|
-
{
|
1457
|
+
typedef struct TermEnumWrapper {
|
1799
1458
|
int index;
|
1800
1459
|
FrtTermEnum *te;
|
1801
1460
|
int *doc_map;
|
@@ -1803,8 +1462,7 @@ typedef struct TermEnumWrapper
|
|
1803
1462
|
char *term;
|
1804
1463
|
} TermEnumWrapper;
|
1805
1464
|
|
1806
|
-
typedef struct MultiTermEnum
|
1807
|
-
{
|
1465
|
+
typedef struct MultiTermEnum {
|
1808
1466
|
FrtTermEnum te;
|
1809
1467
|
int doc_freq;
|
1810
1468
|
FrtPriorityQueue *tew_queue;
|
@@ -1816,29 +1474,24 @@ typedef struct MultiTermEnum
|
|
1816
1474
|
int *ti_indexes;
|
1817
1475
|
} MultiTermEnum;
|
1818
1476
|
|
1819
|
-
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
1820
|
-
{
|
1477
|
+
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2) {
|
1821
1478
|
int cmpres = strcmp(tew1->term, tew2->term);
|
1822
1479
|
if (0 == cmpres) {
|
1823
1480
|
return tew1->index < tew2->index;
|
1824
|
-
}
|
1825
|
-
else {
|
1481
|
+
} else {
|
1826
1482
|
return cmpres < 0;
|
1827
1483
|
}
|
1828
1484
|
}
|
1829
1485
|
|
1830
|
-
static char *tew_next(TermEnumWrapper *tew)
|
1831
|
-
{
|
1486
|
+
static char *tew_next(TermEnumWrapper *tew) {
|
1832
1487
|
return (tew->term = tew->te->next(tew->te));
|
1833
1488
|
}
|
1834
1489
|
|
1835
|
-
static char *tew_skip_to(TermEnumWrapper *tew, const char *term)
|
1836
|
-
{
|
1490
|
+
static char *tew_skip_to(TermEnumWrapper *tew, const char *term) {
|
1837
1491
|
return (tew->term = tew->te->skip_to(tew->te, term));
|
1838
1492
|
}
|
1839
1493
|
|
1840
|
-
static void tew_destroy(TermEnumWrapper *tew)
|
1841
|
-
{
|
1494
|
+
static void tew_destroy(TermEnumWrapper *tew) {
|
1842
1495
|
frt_ir_close(tew->ir);
|
1843
1496
|
if (tew->doc_map) {
|
1844
1497
|
free(tew->doc_map);
|
@@ -1905,8 +1558,7 @@ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num) {
|
|
1905
1558
|
if (tew_next(tew)) {
|
1906
1559
|
frt_pq_push(mte->tew_queue, tew); /* initialize queue */
|
1907
1560
|
}
|
1908
|
-
}
|
1909
|
-
else {
|
1561
|
+
} else {
|
1910
1562
|
sub_te->field_num = -1;
|
1911
1563
|
}
|
1912
1564
|
|
@@ -2009,7 +1661,7 @@ FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, con
|
|
2009
1661
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
2010
1662
|
|
2011
1663
|
sprintf(file_name, "%s.tis", segment);
|
2012
|
-
FrtInStream *is = store->open_input(store, file_name);
|
1664
|
+
FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
|
2013
1665
|
FRT_DEREF(is);
|
2014
1666
|
tir->orig_te = frt_ste_new(is, sfi);
|
2015
1667
|
tir->thread_te = 0;
|
@@ -2088,7 +1740,7 @@ void frt_tir_close(FrtTermInfosReader *tir) {
|
|
2088
1740
|
|
2089
1741
|
static FrtTermWriter *frt_tw_new(FrtStore *store, char *file_name) {
|
2090
1742
|
FrtTermWriter *tw = FRT_ALLOC_AND_ZERO(FrtTermWriter);
|
2091
|
-
tw->os = store->new_output(store, file_name);
|
1743
|
+
tw->os = store->new_output(store, segm_idx_name, file_name);
|
2092
1744
|
tw->last_term = FRT_EMPTY_STRING;
|
2093
1745
|
return tw;
|
2094
1746
|
}
|
@@ -2115,7 +1767,7 @@ FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index
|
|
2115
1767
|
strcpy(file_name + segment_len, ".tis");
|
2116
1768
|
tiw->tis_writer = frt_tw_new(store, file_name);
|
2117
1769
|
strcpy(file_name + segment_len, ".tfx");
|
2118
|
-
tiw->tfx_out = store->new_output(store, file_name);
|
1770
|
+
tiw->tfx_out = store->new_output(store, segm_idx_name, file_name);
|
2119
1771
|
frt_os_write_u32(tiw->tfx_out, 0); /* make space for field_count */
|
2120
1772
|
|
2121
1773
|
/* The following two numbers are the first numbers written to the field
|
@@ -2483,35 +2135,30 @@ static bool stpe_next(FrtTermDocEnum *tde) {
|
|
2483
2135
|
}
|
2484
2136
|
}
|
2485
2137
|
|
2486
|
-
static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2487
|
-
{
|
2138
|
+
static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2488
2139
|
(void)tde; (void)docs; (void)freqs; (void)req_num;
|
2489
2140
|
FRT_RAISE(FRT_ARG_ERROR, "TermPosEnum does not handle processing multiple documents"
|
2490
2141
|
" in one call. Use TermDocEnum instead.");
|
2491
2142
|
return -1;
|
2492
2143
|
}
|
2493
2144
|
|
2494
|
-
static int stpe_next_position(FrtTermDocEnum *tde)
|
2495
|
-
{
|
2145
|
+
static int stpe_next_position(FrtTermDocEnum *tde) {
|
2496
2146
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2497
2147
|
return (stde->prx_cnt-- > 0) ? stde->position += frt_is_read_vint(stde->prx_in)
|
2498
2148
|
: -1;
|
2499
2149
|
}
|
2500
2150
|
|
2501
|
-
static void stpe_close(FrtTermDocEnum *tde)
|
2502
|
-
{
|
2151
|
+
static void stpe_close(FrtTermDocEnum *tde) {
|
2503
2152
|
frt_is_close(STDE(tde)->prx_in);
|
2504
2153
|
STDE(tde)->prx_in = NULL;
|
2505
2154
|
stde_close(tde);
|
2506
2155
|
}
|
2507
2156
|
|
2508
|
-
static void stpe_skip_prox(FrtSegmentTermDocEnum *stde)
|
2509
|
-
{
|
2157
|
+
static void stpe_skip_prox(FrtSegmentTermDocEnum *stde) {
|
2510
2158
|
frt_is_skip_vints(stde->prx_in, stde->freq);
|
2511
2159
|
}
|
2512
2160
|
|
2513
|
-
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr)
|
2514
|
-
{
|
2161
|
+
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr) {
|
2515
2162
|
frt_is_seek(stde->prx_in, prx_ptr);
|
2516
2163
|
stde->prx_cnt = 0;
|
2517
2164
|
}
|
@@ -2520,8 +2167,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
|
|
2520
2167
|
FrtInStream *frq_in,
|
2521
2168
|
FrtInStream *prx_in,
|
2522
2169
|
FrtBitVector *del_docs,
|
2523
|
-
int skip_interval)
|
2524
|
-
{
|
2170
|
+
int skip_interval) {
|
2525
2171
|
FrtTermDocEnum *tde = frt_stde_new(tir, frq_in, del_docs, skip_interval);
|
2526
2172
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2527
2173
|
|
@@ -2550,8 +2196,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
|
|
2550
2196
|
|
2551
2197
|
#define MTDE(tde) ((MultiTermDocEnum *)(tde))
|
2552
2198
|
|
2553
|
-
typedef struct MultiTermDocEnum
|
2554
|
-
{
|
2199
|
+
typedef struct MultiTermDocEnum {
|
2555
2200
|
FrtTermDocEnum tde;
|
2556
2201
|
int *starts;
|
2557
2202
|
int base;
|
@@ -2564,16 +2209,14 @@ typedef struct MultiTermDocEnum
|
|
2564
2209
|
FrtTermDocEnum *curr_tde;
|
2565
2210
|
} MultiTermDocEnum;
|
2566
2211
|
|
2567
|
-
static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
2568
|
-
{
|
2212
|
+
static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde) {
|
2569
2213
|
mtde->ptr++;
|
2570
2214
|
while (mtde->ptr < mtde->ir_cnt && !mtde->state[mtde->ptr]) {
|
2571
2215
|
mtde->ptr++;
|
2572
2216
|
}
|
2573
2217
|
if (mtde->ptr >= mtde->ir_cnt) {
|
2574
2218
|
return mtde->curr_tde = NULL;
|
2575
|
-
}
|
2576
|
-
else {
|
2219
|
+
} else {
|
2577
2220
|
FrtTermDocEnum *tde = mtde->irs_tde[mtde->ptr];
|
2578
2221
|
mtde->base = mtde->starts[mtde->ptr];
|
2579
2222
|
return mtde->curr_tde = tde;
|
@@ -2587,8 +2230,7 @@ static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
|
2587
2230
|
}\
|
2588
2231
|
} while (0)
|
2589
2232
|
|
2590
|
-
static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
2591
|
-
{
|
2233
|
+
static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te) {
|
2592
2234
|
int i;
|
2593
2235
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2594
2236
|
memset(mtde->state, 0, mtde->ir_cnt);
|
@@ -2598,11 +2240,9 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
2598
2240
|
mtde->state[index] = 1;
|
2599
2241
|
if (tde->close == stde_close) {
|
2600
2242
|
stde_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2601
|
-
}
|
2602
|
-
else if (tde->close == stpe_close) {
|
2243
|
+
} else if (tde->close == stpe_close) {
|
2603
2244
|
stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2604
|
-
}
|
2605
|
-
else {
|
2245
|
+
} else {
|
2606
2246
|
tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
|
2607
2247
|
}
|
2608
2248
|
}
|
@@ -2611,48 +2251,40 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
2611
2251
|
mtde_next_tde(mtde);
|
2612
2252
|
}
|
2613
2253
|
|
2614
|
-
static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
2615
|
-
{
|
2254
|
+
static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
2616
2255
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2617
2256
|
FrtTermEnum *te = mtde->te;
|
2618
2257
|
char *t;
|
2619
2258
|
te->set_field(te, field_num);
|
2620
2259
|
if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
|
2621
2260
|
mtde_seek_te(tde, te);
|
2622
|
-
}
|
2623
|
-
else {
|
2261
|
+
} else {
|
2624
2262
|
memset(mtde->state, 0, mtde->ir_cnt);
|
2625
2263
|
}
|
2626
2264
|
}
|
2627
2265
|
|
2628
|
-
static int mtde_doc_num(FrtTermDocEnum *tde)
|
2629
|
-
{
|
2266
|
+
static int mtde_doc_num(FrtTermDocEnum *tde) {
|
2630
2267
|
CHECK_CURR_TDE("doc_num");
|
2631
2268
|
return MTDE(tde)->base + MTDE(tde)->curr_tde->doc_num(MTDE(tde)->curr_tde);
|
2632
2269
|
}
|
2633
2270
|
|
2634
|
-
static int mtde_freq(FrtTermDocEnum *tde)
|
2635
|
-
{
|
2271
|
+
static int mtde_freq(FrtTermDocEnum *tde) {
|
2636
2272
|
CHECK_CURR_TDE("freq");
|
2637
2273
|
return MTDE(tde)->curr_tde->freq(MTDE(tde)->curr_tde);
|
2638
2274
|
}
|
2639
2275
|
|
2640
|
-
static bool mtde_next(FrtTermDocEnum *tde)
|
2641
|
-
{
|
2276
|
+
static bool mtde_next(FrtTermDocEnum *tde) {
|
2642
2277
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2643
2278
|
if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
|
2644
2279
|
return true;
|
2645
|
-
}
|
2646
|
-
else if (mtde_next_tde(mtde)) {
|
2280
|
+
} else if (mtde_next_tde(mtde)) {
|
2647
2281
|
return mtde_next(tde);
|
2648
|
-
}
|
2649
|
-
else {
|
2282
|
+
} else {
|
2650
2283
|
return false;
|
2651
2284
|
}
|
2652
2285
|
}
|
2653
2286
|
|
2654
|
-
static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2655
|
-
{
|
2287
|
+
static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2656
2288
|
int i, end = 0, last_end = 0, b;
|
2657
2289
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2658
2290
|
while (true) {
|
@@ -2661,24 +2293,21 @@ static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2661
2293
|
freqs + last_end, req_num - last_end);
|
2662
2294
|
if (end == last_end) { /* none left in segment */
|
2663
2295
|
if (!mtde_next_tde(mtde)) return end;
|
2664
|
-
}
|
2665
|
-
else { /* got some */
|
2296
|
+
} else { /* got some */
|
2666
2297
|
b = mtde->base; /* adjust doc numbers */
|
2667
2298
|
for (i = last_end; i < end; i++) {
|
2668
2299
|
docs[i] += b;
|
2669
2300
|
}
|
2670
2301
|
if (end == req_num) {
|
2671
2302
|
return end;
|
2672
|
-
}
|
2673
|
-
else {
|
2303
|
+
} else {
|
2674
2304
|
last_end = end;
|
2675
2305
|
}
|
2676
2306
|
}
|
2677
2307
|
}
|
2678
2308
|
}
|
2679
2309
|
|
2680
|
-
static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
2681
|
-
{
|
2310
|
+
static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
2682
2311
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2683
2312
|
FrtTermDocEnum *curr_tde;
|
2684
2313
|
while (NULL != (curr_tde = mtde->curr_tde)) {
|
@@ -2692,8 +2321,7 @@ static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2692
2321
|
return false;
|
2693
2322
|
}
|
2694
2323
|
|
2695
|
-
static void mtde_close(FrtTermDocEnum *tde)
|
2696
|
-
{
|
2324
|
+
static void mtde_close(FrtTermDocEnum *tde) {
|
2697
2325
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2698
2326
|
FrtTermDocEnum *tmp_tde;
|
2699
2327
|
int i = mtde->ir_cnt;
|
@@ -2708,8 +2336,7 @@ static void mtde_close(FrtTermDocEnum *tde)
|
|
2708
2336
|
free(tde);
|
2709
2337
|
}
|
2710
2338
|
|
2711
|
-
static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
|
2712
|
-
{
|
2339
|
+
static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr) {
|
2713
2340
|
MultiTermDocEnum *mtde = FRT_ALLOC_AND_ZERO(MultiTermDocEnum);
|
2714
2341
|
FrtTermDocEnum *tde = TDE(mtde);
|
2715
2342
|
tde->seek = &mtde_seek;
|
@@ -2731,8 +2358,7 @@ static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
|
|
2731
2358
|
return tde;
|
2732
2359
|
}
|
2733
2360
|
|
2734
|
-
static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
|
2735
|
-
{
|
2361
|
+
static FrtTermDocEnum *mtde_new(FrtMultiReader *mr) {
|
2736
2362
|
int i;
|
2737
2363
|
FrtTermDocEnum *tde = mtxe_new(mr);
|
2738
2364
|
tde->next_position = NULL;
|
@@ -2747,14 +2373,12 @@ static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
|
|
2747
2373
|
* MultiTermPosEnum
|
2748
2374
|
****************************************************************************/
|
2749
2375
|
|
2750
|
-
static int mtpe_next_position(FrtTermDocEnum *tde)
|
2751
|
-
{
|
2376
|
+
static int mtpe_next_position(FrtTermDocEnum *tde) {
|
2752
2377
|
CHECK_CURR_TDE("next_position");
|
2753
2378
|
return MTDE(tde)->curr_tde->next_position(MTDE(tde)->curr_tde);
|
2754
2379
|
}
|
2755
2380
|
|
2756
|
-
static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
|
2757
|
-
{
|
2381
|
+
static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr) {
|
2758
2382
|
int i;
|
2759
2383
|
FrtTermDocEnum *tde = mtxe_new(mr);
|
2760
2384
|
tde->next_position = &mtpe_next_position;
|
@@ -2774,8 +2398,7 @@ static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
|
|
2774
2398
|
#define MTDPE(tde) ((MultipleTermDocPosEnum *)(tde))
|
2775
2399
|
#define MTDPE_POS_QUEUE_INIT_CAPA 8
|
2776
2400
|
|
2777
|
-
typedef struct
|
2778
|
-
{
|
2401
|
+
typedef struct {
|
2779
2402
|
FrtTermDocEnum tde;
|
2780
2403
|
int doc_num;
|
2781
2404
|
int freq;
|
@@ -2790,8 +2413,7 @@ static void tde_destroy(FrtTermDocEnum *tde) {
|
|
2790
2413
|
tde->close(tde);
|
2791
2414
|
}
|
2792
2415
|
|
2793
|
-
static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
2794
|
-
{
|
2416
|
+
static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
2795
2417
|
(void)tde;
|
2796
2418
|
(void)field_num;
|
2797
2419
|
(void)term;
|
@@ -2799,18 +2421,15 @@ static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
|
2799
2421
|
" the #seek operation");
|
2800
2422
|
}
|
2801
2423
|
|
2802
|
-
static int mtdpe_doc_num(FrtTermDocEnum *tde)
|
2803
|
-
{
|
2424
|
+
static int mtdpe_doc_num(FrtTermDocEnum *tde) {
|
2804
2425
|
return MTDPE(tde)->doc_num;
|
2805
2426
|
}
|
2806
2427
|
|
2807
|
-
static int mtdpe_freq(FrtTermDocEnum *tde)
|
2808
|
-
{
|
2428
|
+
static int mtdpe_freq(FrtTermDocEnum *tde) {
|
2809
2429
|
return MTDPE(tde)->freq;
|
2810
2430
|
}
|
2811
2431
|
|
2812
|
-
static bool mtdpe_next(FrtTermDocEnum *tde)
|
2813
|
-
{
|
2432
|
+
static bool mtdpe_next(FrtTermDocEnum *tde) {
|
2814
2433
|
FrtTermDocEnum *sub_tde;
|
2815
2434
|
int pos = 0, freq = 0;
|
2816
2435
|
int doc;
|
@@ -2839,8 +2458,7 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
|
|
2839
2458
|
|
2840
2459
|
if (sub_tde->next(sub_tde)) {
|
2841
2460
|
frt_pq_down(mtdpe->pq);
|
2842
|
-
}
|
2843
|
-
else {
|
2461
|
+
} else {
|
2844
2462
|
sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe->pq);
|
2845
2463
|
sub_tde->close(sub_tde);
|
2846
2464
|
}
|
@@ -2856,13 +2474,11 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
|
|
2856
2474
|
return true;
|
2857
2475
|
}
|
2858
2476
|
|
2859
|
-
static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2)
|
2860
|
-
{
|
2477
|
+
static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2) {
|
2861
2478
|
return p1->doc_num(p1) < p2->doc_num(p2);
|
2862
2479
|
}
|
2863
2480
|
|
2864
|
-
static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
2865
|
-
{
|
2481
|
+
static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
2866
2482
|
FrtTermDocEnum *sub_tde;
|
2867
2483
|
FrtPriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
|
2868
2484
|
|
@@ -2870,8 +2486,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2870
2486
|
&& (target_doc_num > sub_tde->doc_num(sub_tde))) {
|
2871
2487
|
if (sub_tde->skip_to(sub_tde, target_doc_num)) {
|
2872
2488
|
frt_pq_down(mtdpe_pq);
|
2873
|
-
}
|
2874
|
-
else {
|
2489
|
+
} else {
|
2875
2490
|
sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe_pq);
|
2876
2491
|
sub_tde->close(sub_tde);
|
2877
2492
|
}
|
@@ -2879,8 +2494,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2879
2494
|
return tde->next(tde);
|
2880
2495
|
}
|
2881
2496
|
|
2882
|
-
static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2883
|
-
{
|
2497
|
+
static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2884
2498
|
(void)tde;
|
2885
2499
|
(void)docs;
|
2886
2500
|
(void)freqs;
|
@@ -2889,21 +2503,18 @@ static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2889
2503
|
return req_num;
|
2890
2504
|
}
|
2891
2505
|
|
2892
|
-
static int mtdpe_next_position(FrtTermDocEnum *tde)
|
2893
|
-
{
|
2506
|
+
static int mtdpe_next_position(FrtTermDocEnum *tde) {
|
2894
2507
|
return MTDPE(tde)->pos_queue[MTDPE(tde)->pos_queue_index++];
|
2895
2508
|
}
|
2896
2509
|
|
2897
|
-
static void mtdpe_close(FrtTermDocEnum *tde)
|
2898
|
-
{
|
2510
|
+
static void mtdpe_close(FrtTermDocEnum *tde) {
|
2899
2511
|
frt_pq_clear(MTDPE(tde)->pq);
|
2900
2512
|
frt_pq_destroy(MTDPE(tde)->pq);
|
2901
2513
|
free(MTDPE(tde)->pos_queue);
|
2902
2514
|
free(tde);
|
2903
2515
|
}
|
2904
2516
|
|
2905
|
-
FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt)
|
2906
|
-
{
|
2517
|
+
FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt) {
|
2907
2518
|
int i;
|
2908
2519
|
MultipleTermDocPosEnum *mtdpe = FRT_ALLOC_AND_ZERO(MultipleTermDocPosEnum);
|
2909
2520
|
FrtTermDocEnum *tde = TDE(mtdpe);
|
@@ -2918,8 +2529,7 @@ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, i
|
|
2918
2529
|
tpe->seek(tpe, field_num, terms[i]);
|
2919
2530
|
if (tpe->next(tpe)) {
|
2920
2531
|
frt_pq_push(pq, tpe);
|
2921
|
-
}
|
2922
|
-
else {
|
2532
|
+
} else {
|
2923
2533
|
tpe->close(tpe);
|
2924
2534
|
}
|
2925
2535
|
}
|
@@ -2952,26 +2562,22 @@ static void file_name_filter_init(void) {
|
|
2952
2562
|
frt_register_for_cleanup(fn_extensions, (frt_free_ft)&frt_h_destroy);
|
2953
2563
|
}
|
2954
2564
|
|
2955
|
-
bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks)
|
2956
|
-
{
|
2565
|
+
bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks) {
|
2957
2566
|
char *p = strrchr(file_name, '.');
|
2958
2567
|
if (NULL == fn_extensions) file_name_filter_init();
|
2959
2568
|
if (NULL != p) {
|
2960
2569
|
char *extension = p + 1;
|
2961
2570
|
if (NULL != frt_h_get(fn_extensions, extension)) {
|
2962
2571
|
return true;
|
2963
|
-
}
|
2964
|
-
else if ((*extension == 'f' || *extension == 's')
|
2572
|
+
} else if ((*extension == 'f' || *extension == 's')
|
2965
2573
|
&& *(extension + 1) >= '0'
|
2966
2574
|
&& *(extension + 1) <= '9') {
|
2967
2575
|
return true;
|
2968
|
-
}
|
2969
|
-
else if (include_locks && (strcmp(extension, "lck") == 0)
|
2576
|
+
} else if (include_locks && (strcmp(extension, "lck") == 0)
|
2970
2577
|
&& (strncmp(file_name, "ferret", 6) == 0)) {
|
2971
2578
|
return true;
|
2972
2579
|
}
|
2973
|
-
}
|
2974
|
-
else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
|
2580
|
+
} else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
|
2975
2581
|
sizeof(FRT_SEGMENTS_FILE_NAME) - 1)) {
|
2976
2582
|
return true;
|
2977
2583
|
}
|
@@ -3007,8 +2613,8 @@ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
|
|
3007
2613
|
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
|
3008
2614
|
FrtStore *store = dlr->store;
|
3009
2615
|
FRT_TRY
|
3010
|
-
if (store->exists(store, file_name)) {
|
3011
|
-
store->remove(store, file_name);
|
2616
|
+
if (store->exists(store, segm_idx_name, file_name)) {
|
2617
|
+
store->remove(store, segm_idx_name, file_name);
|
3012
2618
|
}
|
3013
2619
|
frt_hs_del(dlr->pending, file_name);
|
3014
2620
|
FRT_XCATCHALL
|
@@ -3123,20 +2729,18 @@ void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
|
|
3123
2729
|
* info: */
|
3124
2730
|
frt_sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
|
3125
2731
|
|
3126
|
-
store->each(store, &frt_deleter_find_deletable_files_i, &dfa);
|
2732
|
+
store->each(store, segm_idx_name, &frt_deleter_find_deletable_files_i, &dfa);
|
3127
2733
|
frt_h_destroy(dfa.current);
|
3128
2734
|
}
|
3129
2735
|
|
3130
|
-
static void deleter_delete_deletable_files(FrtDeleter *dlr)
|
3131
|
-
{
|
2736
|
+
static void deleter_delete_deletable_files(FrtDeleter *dlr) {
|
3132
2737
|
frt_deleter_find_deletable_files(dlr);
|
3133
2738
|
deleter_commit_pending_deletions(dlr);
|
3134
2739
|
}
|
3135
2740
|
|
3136
2741
|
/*
|
3137
2742
|
TODO: currently not used. Why not?
|
3138
|
-
static void deleter_clear_pending_deletions(FrtDeleter *dlr)
|
3139
|
-
{
|
2743
|
+
static void deleter_clear_pending_deletions(FrtDeleter *dlr) {
|
3140
2744
|
frt_hs_clear(dlr->pending);
|
3141
2745
|
}
|
3142
2746
|
*/
|
@@ -3147,14 +2751,12 @@ static void deleter_clear_pending_deletions(FrtDeleter *dlr)
|
|
3147
2751
|
*
|
3148
2752
|
****************************************************************************/
|
3149
2753
|
|
3150
|
-
static void ir_acquire_not_necessary(FrtIndexReader *ir)
|
3151
|
-
{
|
2754
|
+
static void ir_acquire_not_necessary(FrtIndexReader *ir) {
|
3152
2755
|
(void)ir;
|
3153
2756
|
}
|
3154
2757
|
|
3155
2758
|
#define I64_PFX POSH_I64_PRINTF_PREFIX
|
3156
|
-
static void ir_acquire_write_lock(FrtIndexReader *ir)
|
3157
|
-
{
|
2759
|
+
static void ir_acquire_write_lock(FrtIndexReader *ir) {
|
3158
2760
|
if (ir->is_stale) {
|
3159
2761
|
FRT_RAISE(FRT_STATE_ERROR, "IndexReader out of date and no longer valid for "
|
3160
2762
|
"delete, undelete, or set_norm operations. To "
|
@@ -3163,7 +2765,7 @@ static void ir_acquire_write_lock(FrtIndexReader *ir)
|
|
3163
2765
|
}
|
3164
2766
|
|
3165
2767
|
if (NULL == ir->write_lock) {
|
3166
|
-
ir->write_lock = frt_open_lock(ir->store, FRT_WRITE_LOCK_NAME);
|
2768
|
+
ir->write_lock = frt_open_lock(ir->store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
3167
2769
|
if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
|
3168
2770
|
FRT_RAISE(FRT_LOCK_ERROR, "Could not obtain write lock when trying to "
|
3169
2771
|
"write changes to the index. Check that there "
|
@@ -3209,8 +2811,7 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3209
2811
|
ir->is_owner = is_owner;
|
3210
2812
|
if (is_owner) {
|
3211
2813
|
ir->acquire_write_lock = &ir_acquire_write_lock;
|
3212
|
-
}
|
3213
|
-
else {
|
2814
|
+
} else {
|
3214
2815
|
ir->acquire_write_lock = &ir_acquire_not_necessary;
|
3215
2816
|
}
|
3216
2817
|
|
@@ -3221,8 +2822,7 @@ int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term) {
|
|
3221
2822
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3222
2823
|
if (field_num >= 0) {
|
3223
2824
|
return ir->doc_freq(ir, field_num, term);
|
3224
|
-
}
|
3225
|
-
else {
|
2825
|
+
} else {
|
3226
2826
|
return 0;
|
3227
2827
|
}
|
3228
2828
|
}
|
@@ -3242,8 +2842,7 @@ void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val) {
|
|
3242
2842
|
}
|
3243
2843
|
}
|
3244
2844
|
|
3245
|
-
frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num)
|
3246
|
-
{
|
2845
|
+
frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num) {
|
3247
2846
|
frt_uchar *norms = NULL;
|
3248
2847
|
if (field_num >= 0) {
|
3249
2848
|
norms = ir->get_norms(ir, field_num);
|
@@ -3266,15 +2865,13 @@ frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
|
|
3266
2865
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3267
2866
|
if (field_num >= 0) {
|
3268
2867
|
ir->get_norms_into(ir, field_num, buf);
|
3269
|
-
}
|
3270
|
-
else {
|
2868
|
+
} else {
|
3271
2869
|
memset(buf, 0, ir->max_doc(ir));
|
3272
2870
|
}
|
3273
2871
|
return buf;
|
3274
2872
|
}
|
3275
2873
|
|
3276
|
-
void frt_ir_undelete_all(FrtIndexReader *ir)
|
3277
|
-
{
|
2874
|
+
void frt_ir_undelete_all(FrtIndexReader *ir) {
|
3278
2875
|
pthread_mutex_lock(&ir->mutex);
|
3279
2876
|
ir->acquire_write_lock(ir);
|
3280
2877
|
ir->undelete_all_i(ir);
|
@@ -3282,8 +2879,7 @@ void frt_ir_undelete_all(FrtIndexReader *ir)
|
|
3282
2879
|
pthread_mutex_unlock(&ir->mutex);
|
3283
2880
|
}
|
3284
2881
|
|
3285
|
-
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num)
|
3286
|
-
{
|
2882
|
+
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num) {
|
3287
2883
|
if (doc_num >= 0 && doc_num < ir->max_doc(ir)) {
|
3288
2884
|
pthread_mutex_lock(&ir->mutex);
|
3289
2885
|
ir->acquire_write_lock(ir);
|
@@ -3342,8 +2938,7 @@ FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const ch
|
|
3342
2938
|
return tde;
|
3343
2939
|
}
|
3344
2940
|
|
3345
|
-
static void ir_commit_i(FrtIndexReader *ir)
|
3346
|
-
{
|
2941
|
+
static void ir_commit_i(FrtIndexReader *ir) {
|
3347
2942
|
if (ir->has_changes) {
|
3348
2943
|
if (NULL == ir->deleter && NULL != ir->store) {
|
3349
2944
|
/* In the MultiReader case, we share this deleter across all
|
@@ -3369,16 +2964,14 @@ static void ir_commit_i(FrtIndexReader *ir)
|
|
3369
2964
|
frt_close_lock(ir->write_lock);
|
3370
2965
|
ir->write_lock = NULL;
|
3371
2966
|
}
|
3372
|
-
}
|
3373
|
-
else {
|
2967
|
+
} else {
|
3374
2968
|
ir->commit_i(ir);
|
3375
2969
|
}
|
3376
2970
|
}
|
3377
2971
|
ir->has_changes = false;
|
3378
2972
|
}
|
3379
2973
|
|
3380
|
-
void frt_ir_commit(FrtIndexReader *ir)
|
3381
|
-
{
|
2974
|
+
void frt_ir_commit(FrtIndexReader *ir) {
|
3382
2975
|
pthread_mutex_lock(&ir->mutex);
|
3383
2976
|
ir_commit_i(ir);
|
3384
2977
|
pthread_mutex_unlock(&ir->mutex);
|
@@ -3411,15 +3004,13 @@ void frt_ir_close(FrtIndexReader *ir) {
|
|
3411
3004
|
/**
|
3412
3005
|
* Don't call this method if the cache already exists
|
3413
3006
|
**/
|
3414
|
-
void frt_ir_add_cache(FrtIndexReader *ir)
|
3415
|
-
{
|
3007
|
+
void frt_ir_add_cache(FrtIndexReader *ir) {
|
3416
3008
|
if (NULL == ir->cache) {
|
3417
3009
|
ir->cache = frt_co_hash_create();
|
3418
3010
|
}
|
3419
3011
|
}
|
3420
3012
|
|
3421
|
-
bool frt_ir_is_latest(FrtIndexReader *ir)
|
3422
|
-
{
|
3013
|
+
bool frt_ir_is_latest(FrtIndexReader *ir) {
|
3423
3014
|
return ir->is_latest_i(ir);
|
3424
3015
|
}
|
3425
3016
|
|
@@ -3434,8 +3025,7 @@ typedef struct Norm {
|
|
3434
3025
|
bool is_dirty : 1;
|
3435
3026
|
} Norm;
|
3436
3027
|
|
3437
|
-
static Norm *norm_create(FrtInStream *is, int field_num)
|
3438
|
-
{
|
3028
|
+
static Norm *norm_create(FrtInStream *is, int field_num) {
|
3439
3029
|
Norm *norm = FRT_ALLOC(Norm);
|
3440
3030
|
|
3441
3031
|
norm->is = is;
|
@@ -3447,8 +3037,7 @@ static Norm *norm_create(FrtInStream *is, int field_num)
|
|
3447
3037
|
return norm;
|
3448
3038
|
}
|
3449
3039
|
|
3450
|
-
static void norm_destroy(Norm *norm)
|
3451
|
-
{
|
3040
|
+
static void norm_destroy(Norm *norm) {
|
3452
3041
|
frt_is_close(norm->is);
|
3453
3042
|
if (NULL != norm->bytes) {
|
3454
3043
|
free(norm->bytes);
|
@@ -3457,8 +3046,7 @@ static void norm_destroy(Norm *norm)
|
|
3457
3046
|
}
|
3458
3047
|
|
3459
3048
|
static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
3460
|
-
FrtSegmentInfo *si, int doc_count)
|
3461
|
-
{
|
3049
|
+
FrtSegmentInfo *si, int doc_count) {
|
3462
3050
|
FrtOutStream *os;
|
3463
3051
|
char norm_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3464
3052
|
const int field_num = norm->field_num;
|
@@ -3468,7 +3056,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
|
3468
3056
|
}
|
3469
3057
|
frt_si_advance_norm_gen(si, field_num);
|
3470
3058
|
si_norm_file_name(si, norm_file_name, field_num);
|
3471
|
-
os = store->new_output(store, norm_file_name);
|
3059
|
+
os = store->new_output(store, segm_idx_name, norm_file_name);
|
3472
3060
|
frt_os_write_bytes(os, norm->bytes, doc_count);
|
3473
3061
|
frt_os_close(os);
|
3474
3062
|
norm->is_dirty = false;
|
@@ -3492,8 +3080,7 @@ static FrtFieldsReader *sr_fr(FrtSegmentReader *sr) {
|
|
3492
3080
|
return fr;
|
3493
3081
|
}
|
3494
3082
|
|
3495
|
-
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num)
|
3496
|
-
{
|
3083
|
+
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num) {
|
3497
3084
|
return (NULL != sr->deleted_docs && frt_bv_get(sr->deleted_docs, doc_num));
|
3498
3085
|
}
|
3499
3086
|
|
@@ -3561,10 +3148,9 @@ static void sr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
|
|
3561
3148
|
ir->deleter = deleter;
|
3562
3149
|
}
|
3563
3150
|
|
3564
|
-
static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
|
3565
|
-
{
|
3151
|
+
static void bv_write(FrtBitVector *bv, FrtStore *store, char *name) {
|
3566
3152
|
int i;
|
3567
|
-
FrtOutStream *os = store->new_output(store, name);
|
3153
|
+
FrtOutStream *os = store->new_output(store, segm_idx_name, name);
|
3568
3154
|
frt_os_write_vint(os, bv->size);
|
3569
3155
|
for (i = ((bv->size-1) >> 5); i >= 0; i--) {
|
3570
3156
|
frt_os_write_u32(os, bv->bits[i]);
|
@@ -3572,11 +3158,10 @@ static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
|
|
3572
3158
|
frt_os_close(os);
|
3573
3159
|
}
|
3574
3160
|
|
3575
|
-
static FrtBitVector *bv_read(FrtStore *store, char *name)
|
3576
|
-
{
|
3161
|
+
static FrtBitVector *bv_read(FrtStore *store, char *name) {
|
3577
3162
|
int i;
|
3578
3163
|
volatile bool success = false;
|
3579
|
-
FrtInStream *volatile is = store->open_input(store, name);
|
3164
|
+
FrtInStream *volatile is = store->open_input(store, segm_idx_name, name);
|
3580
3165
|
FrtBitVector *volatile bv = FRT_ALLOC_AND_ZERO(FrtBitVector);
|
3581
3166
|
bv->size = (int)frt_is_read_vint(is);
|
3582
3167
|
bv->capa = (bv->size >> 5) + 1;
|
@@ -3595,13 +3180,11 @@ static FrtBitVector *bv_read(FrtStore *store, char *name)
|
|
3595
3180
|
return bv;
|
3596
3181
|
}
|
3597
3182
|
|
3598
|
-
static bool sr_is_latest_i(FrtIndexReader *ir)
|
3599
|
-
{
|
3183
|
+
static bool sr_is_latest_i(FrtIndexReader *ir) {
|
3600
3184
|
return (frt_sis_read_current_version(ir->store) == ir->sis->version);
|
3601
3185
|
}
|
3602
3186
|
|
3603
|
-
static void sr_commit_i(FrtIndexReader *ir)
|
3604
|
-
{
|
3187
|
+
static void sr_commit_i(FrtIndexReader *ir) {
|
3605
3188
|
FrtSegmentInfo *si = SR(ir)->si;
|
3606
3189
|
char *segment = SR(ir)->si->name;
|
3607
3190
|
char tmp_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -3614,8 +3197,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
3614
3197
|
if (SR(ir)->undelete_all) {
|
3615
3198
|
si->del_gen = -1;
|
3616
3199
|
SR(ir)->undelete_all = false;
|
3617
|
-
}
|
3618
|
-
else {
|
3200
|
+
} else {
|
3619
3201
|
/* (SR(ir)->deleted_docs_dirty) re-write deleted */
|
3620
3202
|
si->del_gen++;
|
3621
3203
|
frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
@@ -3658,8 +3240,7 @@ static void sr_close_i(FrtIndexReader *ir) {
|
|
3658
3240
|
}
|
3659
3241
|
}
|
3660
3242
|
|
3661
|
-
static int sr_num_docs(FrtIndexReader *ir)
|
3662
|
-
{
|
3243
|
+
static int sr_num_docs(FrtIndexReader *ir) {
|
3663
3244
|
int num_docs;
|
3664
3245
|
|
3665
3246
|
pthread_mutex_lock(&ir->mutex);
|
@@ -3671,13 +3252,11 @@ static int sr_num_docs(FrtIndexReader *ir)
|
|
3671
3252
|
return num_docs;
|
3672
3253
|
}
|
3673
3254
|
|
3674
|
-
static int sr_max_doc(FrtIndexReader *ir)
|
3675
|
-
{
|
3255
|
+
static int sr_max_doc(FrtIndexReader *ir) {
|
3676
3256
|
return SR(ir)->fr->size;
|
3677
3257
|
}
|
3678
3258
|
|
3679
|
-
static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
|
3680
|
-
{
|
3259
|
+
static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num) {
|
3681
3260
|
FrtDocument *doc;
|
3682
3261
|
pthread_mutex_lock(&ir->mutex);
|
3683
3262
|
if (sr_is_deleted_i(SR(ir), doc_num)) {
|
@@ -3689,8 +3268,7 @@ static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
|
|
3689
3268
|
return doc;
|
3690
3269
|
}
|
3691
3270
|
|
3692
|
-
static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
3693
|
-
{
|
3271
|
+
static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
|
3694
3272
|
FrtLazyDoc *lazy_doc;
|
3695
3273
|
pthread_mutex_lock(&ir->mutex);
|
3696
3274
|
if (sr_is_deleted_i(SR(ir), doc_num)) {
|
@@ -3702,8 +3280,7 @@ static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
|
3702
3280
|
return lazy_doc;
|
3703
3281
|
}
|
3704
3282
|
|
3705
|
-
static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
|
3706
|
-
{
|
3283
|
+
static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num) {
|
3707
3284
|
frt_uchar *norms;
|
3708
3285
|
pthread_mutex_lock(&ir->mutex);
|
3709
3286
|
norms = sr_get_norms_i(SR(ir), field_num);
|
@@ -3712,23 +3289,20 @@ static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
|
|
3712
3289
|
}
|
3713
3290
|
|
3714
3291
|
static frt_uchar *sr_get_norms_into(FrtIndexReader *ir, int field_num,
|
3715
|
-
frt_uchar *buf)
|
3716
|
-
{
|
3292
|
+
frt_uchar *buf) {
|
3717
3293
|
pthread_mutex_lock(&ir->mutex);
|
3718
3294
|
sr_get_norms_into_i(SR(ir), field_num, buf);
|
3719
3295
|
pthread_mutex_unlock(&ir->mutex);
|
3720
3296
|
return buf;
|
3721
3297
|
}
|
3722
3298
|
|
3723
|
-
static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num)
|
3724
|
-
{
|
3299
|
+
static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num) {
|
3725
3300
|
FrtTermEnum *te = SR(ir)->tir->orig_te;
|
3726
3301
|
te = frt_ste_clone(te);
|
3727
3302
|
return ste_set_field(te, field_num);
|
3728
3303
|
}
|
3729
3304
|
|
3730
|
-
static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
|
3731
|
-
{
|
3305
|
+
static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
|
3732
3306
|
FrtTermEnum *te = SR(ir)->tir->orig_te;
|
3733
3307
|
te = frt_ste_clone(te);
|
3734
3308
|
ste_set_field(te, field_num);
|
@@ -3736,20 +3310,17 @@ static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char
|
|
3736
3310
|
return te;
|
3737
3311
|
}
|
3738
3312
|
|
3739
|
-
static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term)
|
3740
|
-
{
|
3313
|
+
static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term) {
|
3741
3314
|
FrtTermInfo *ti = frt_tir_get_ti(frt_tir_set_field(SR(ir)->tir, field_num), term);
|
3742
3315
|
return ti ? ti->doc_freq : 0;
|
3743
3316
|
}
|
3744
3317
|
|
3745
|
-
static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir)
|
3746
|
-
{
|
3318
|
+
static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir) {
|
3747
3319
|
return frt_stde_new(SR(ir)->tir, SR(ir)->frq_in, SR(ir)->deleted_docs,
|
3748
3320
|
STE(SR(ir)->tir->orig_te)->skip_interval);
|
3749
3321
|
}
|
3750
3322
|
|
3751
|
-
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir)
|
3752
|
-
{
|
3323
|
+
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir) {
|
3753
3324
|
FrtSegmentReader *sr = SR(ir);
|
3754
3325
|
return frt_stpe_new(sr->tir, sr->frq_in, sr->prx_in, sr->deleted_docs,
|
3755
3326
|
STE(sr->tir->orig_te)->skip_interval);
|
@@ -3766,8 +3337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
3766
3337
|
return frt_fr_get_field_tv(fr, doc_num, fi->number);
|
3767
3338
|
}
|
3768
3339
|
|
3769
|
-
static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
|
3770
|
-
{
|
3340
|
+
static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num) {
|
3771
3341
|
FrtFieldsReader *fr;
|
3772
3342
|
if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
|
3773
3343
|
return NULL;
|
@@ -3776,8 +3346,7 @@ static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
|
|
3776
3346
|
return frt_fr_get_tv(fr, doc_num);
|
3777
3347
|
}
|
3778
3348
|
|
3779
|
-
static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
|
3780
|
-
{
|
3349
|
+
static bool sr_is_deleted(FrtIndexReader *ir, int doc_num) {
|
3781
3350
|
bool is_del;
|
3782
3351
|
|
3783
3352
|
pthread_mutex_lock(&ir->mutex);
|
@@ -3787,13 +3356,11 @@ static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
|
|
3787
3356
|
return is_del;
|
3788
3357
|
}
|
3789
3358
|
|
3790
|
-
static bool sr_has_deletions(FrtIndexReader *ir)
|
3791
|
-
{
|
3359
|
+
static bool sr_has_deletions(FrtIndexReader *ir) {
|
3792
3360
|
return NULL != SR(ir)->deleted_docs;
|
3793
3361
|
}
|
3794
3362
|
|
3795
|
-
static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
3796
|
-
{
|
3363
|
+
static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store) {
|
3797
3364
|
int i;
|
3798
3365
|
FrtSegmentInfo *si = SR(ir)->si;
|
3799
3366
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -3801,7 +3368,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
3801
3368
|
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
3802
3369
|
FrtStore *store = ir->store;
|
3803
3370
|
if (si_norm_file_name(si, file_name, i)) {
|
3804
|
-
FrtInStream *is = store->open_input(store, file_name);
|
3371
|
+
FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
|
3805
3372
|
FRT_DEREF(is);
|
3806
3373
|
frt_h_set_int(SR(ir)->norms, i, norm_create(is, i));
|
3807
3374
|
}
|
@@ -3809,8 +3376,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
3809
3376
|
SR(ir)->norms_dirty = false;
|
3810
3377
|
}
|
3811
3378
|
|
3812
|
-
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
3813
|
-
{
|
3379
|
+
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr) {
|
3814
3380
|
FrtStore *volatile store = sr->si->store;
|
3815
3381
|
FrtIndexReader *ir = IR(sr);
|
3816
3382
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -3859,12 +3425,12 @@ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
|
3859
3425
|
}
|
3860
3426
|
|
3861
3427
|
sprintf(file_name, "%s.frq", sr_segment);
|
3862
|
-
sr->frq_in = store->open_input(store, file_name);
|
3428
|
+
sr->frq_in = store->open_input(store, segm_idx_name, file_name);
|
3863
3429
|
sprintf(file_name, "%s.prx", sr_segment);
|
3864
|
-
sr->prx_in = store->open_input(store, file_name);
|
3430
|
+
sr->prx_in = store->open_input(store, segm_idx_name, file_name);
|
3865
3431
|
sr->norms = frt_h_new_int((frt_free_ft)&norm_destroy);
|
3866
3432
|
sr_open_norms(ir, store);
|
3867
|
-
if (
|
3433
|
+
if (frt_fis_has_vectors(ir->fis)) {
|
3868
3434
|
frb_thread_key_create(&sr->thread_fr, NULL);
|
3869
3435
|
sr->fr_bucket = frt_ary_new();
|
3870
3436
|
}
|
@@ -3894,8 +3460,7 @@ static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_
|
|
3894
3460
|
|
3895
3461
|
#define MR(ir) ((FrtMultiReader *)(ir))
|
3896
3462
|
|
3897
|
-
static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
3898
|
-
{
|
3463
|
+
static int mr_reader_index_i(FrtMultiReader *mr, int doc_num) {
|
3899
3464
|
int lo = 0; /* search @starts array */
|
3900
3465
|
int hi = mr->r_cnt - 1; /* for first element less */
|
3901
3466
|
int mid;
|
@@ -3906,11 +3471,9 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
|
3906
3471
|
mid_value = mr->starts[mid];
|
3907
3472
|
if (doc_num < mid_value) {
|
3908
3473
|
hi = mid - 1;
|
3909
|
-
}
|
3910
|
-
else if (doc_num > mid_value) {
|
3474
|
+
} else if (doc_num > mid_value) {
|
3911
3475
|
lo = mid + 1;
|
3912
|
-
}
|
3913
|
-
else { /* found a match */
|
3476
|
+
} else { /* found a match */
|
3914
3477
|
while ((mid+1 < mr->r_cnt) && (mr->starts[mid+1] == mid_value)) {
|
3915
3478
|
mid += 1; /* scan to last match in case we have empty segments */
|
3916
3479
|
}
|
@@ -3920,8 +3483,7 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
|
3920
3483
|
return hi;
|
3921
3484
|
}
|
3922
3485
|
|
3923
|
-
static int mr_num_docs(FrtIndexReader *ir)
|
3924
|
-
{
|
3486
|
+
static int mr_num_docs(FrtIndexReader *ir) {
|
3925
3487
|
int i, num_docs;
|
3926
3488
|
pthread_mutex_lock(&ir->mutex);
|
3927
3489
|
if (MR(ir)->num_docs_cache == -1) {
|
@@ -3938,8 +3500,7 @@ static int mr_num_docs(FrtIndexReader *ir)
|
|
3938
3500
|
return num_docs;
|
3939
3501
|
}
|
3940
3502
|
|
3941
|
-
static int mr_max_doc(FrtIndexReader *ir)
|
3942
|
-
{
|
3503
|
+
static int mr_max_doc(FrtIndexReader *ir) {
|
3943
3504
|
return MR(ir)->max_doc;
|
3944
3505
|
}
|
3945
3506
|
|
@@ -3947,30 +3508,25 @@ static int mr_max_doc(FrtIndexReader *ir)
|
|
3947
3508
|
int i = mr_reader_index_i(MR(ir), doc_num);\
|
3948
3509
|
FrtIndexReader *reader = MR(ir)->sub_readers[i]
|
3949
3510
|
|
3950
|
-
static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num)
|
3951
|
-
{
|
3511
|
+
static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num) {
|
3952
3512
|
GET_READER();
|
3953
3513
|
return reader->get_doc(reader, doc_num - MR(ir)->starts[i]);
|
3954
3514
|
}
|
3955
3515
|
|
3956
|
-
static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
3957
|
-
{
|
3516
|
+
static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
|
3958
3517
|
GET_READER();
|
3959
3518
|
return reader->get_lazy_doc(reader, doc_num - MR(ir)->starts[i]);
|
3960
3519
|
}
|
3961
3520
|
|
3962
|
-
int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num)
|
3963
|
-
{
|
3521
|
+
int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num) {
|
3964
3522
|
if (mr->field_num_map) {
|
3965
3523
|
return mr->field_num_map[ir_num][f_num];
|
3966
|
-
}
|
3967
|
-
else {
|
3524
|
+
} else {
|
3968
3525
|
return f_num;
|
3969
3526
|
}
|
3970
3527
|
}
|
3971
3528
|
|
3972
|
-
static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
|
3973
|
-
{
|
3529
|
+
static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num) {
|
3974
3530
|
frt_uchar *bytes;
|
3975
3531
|
|
3976
3532
|
pthread_mutex_lock(&ir->mutex);
|
@@ -3995,16 +3551,14 @@ static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
|
|
3995
3551
|
return bytes;
|
3996
3552
|
}
|
3997
3553
|
|
3998
|
-
static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf)
|
3999
|
-
{
|
3554
|
+
static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf) {
|
4000
3555
|
frt_uchar *bytes;
|
4001
3556
|
|
4002
3557
|
pthread_mutex_lock(&ir->mutex);
|
4003
3558
|
bytes = (frt_uchar *)frt_h_get_int(MR(ir)->norms_cache, field_num);
|
4004
3559
|
if (NULL != bytes) {
|
4005
3560
|
memcpy(buf, bytes, MR(ir)->max_doc);
|
4006
|
-
}
|
4007
|
-
else {
|
3561
|
+
} else {
|
4008
3562
|
int i;
|
4009
3563
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4010
3564
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4019,18 +3573,15 @@ static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar
|
|
4019
3573
|
return buf;
|
4020
3574
|
}
|
4021
3575
|
|
4022
|
-
static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num)
|
4023
|
-
{
|
3576
|
+
static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num) {
|
4024
3577
|
return frt_mte_new(MR(ir), field_num, NULL);
|
4025
3578
|
}
|
4026
3579
|
|
4027
|
-
static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
|
4028
|
-
{
|
3580
|
+
static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
|
4029
3581
|
return frt_mte_new(MR(ir), field_num, term);
|
4030
3582
|
}
|
4031
3583
|
|
4032
|
-
static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
|
4033
|
-
{
|
3584
|
+
static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t) {
|
4034
3585
|
int total = 0; /* sum freqs in segments */
|
4035
3586
|
int i = MR(ir)->r_cnt;
|
4036
3587
|
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
@@ -4043,13 +3594,11 @@ static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
|
|
4043
3594
|
return total;
|
4044
3595
|
}
|
4045
3596
|
|
4046
|
-
static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir)
|
4047
|
-
{
|
3597
|
+
static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir) {
|
4048
3598
|
return mtde_new(MR(ir));
|
4049
3599
|
}
|
4050
3600
|
|
4051
|
-
static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir)
|
4052
|
-
{
|
3601
|
+
static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir) {
|
4053
3602
|
return mtpe_new(MR(ir));
|
4054
3603
|
}
|
4055
3604
|
|
@@ -4058,25 +3607,21 @@ static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
4058
3607
|
return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
|
4059
3608
|
}
|
4060
3609
|
|
4061
|
-
static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num)
|
4062
|
-
{
|
3610
|
+
static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num) {
|
4063
3611
|
GET_READER();
|
4064
3612
|
return reader->term_vectors(reader, doc_num - MR(ir)->starts[i]);
|
4065
3613
|
}
|
4066
3614
|
|
4067
|
-
static bool mr_is_deleted(FrtIndexReader *ir, int doc_num)
|
4068
|
-
{
|
3615
|
+
static bool mr_is_deleted(FrtIndexReader *ir, int doc_num) {
|
4069
3616
|
GET_READER();
|
4070
3617
|
return reader->is_deleted(reader, doc_num - MR(ir)->starts[i]);
|
4071
3618
|
}
|
4072
3619
|
|
4073
|
-
static bool mr_has_deletions(FrtIndexReader *ir)
|
4074
|
-
{
|
3620
|
+
static bool mr_has_deletions(FrtIndexReader *ir) {
|
4075
3621
|
return MR(ir)->has_deletions;
|
4076
3622
|
}
|
4077
3623
|
|
4078
|
-
static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val)
|
4079
|
-
{
|
3624
|
+
static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val) {
|
4080
3625
|
int i = mr_reader_index_i(MR(ir), doc_num);
|
4081
3626
|
int fnum = frt_mr_get_field_num(MR(ir), i, field_num);
|
4082
3627
|
if (fnum >= 0) {
|
@@ -4087,8 +3632,7 @@ static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uc
|
|
4087
3632
|
}
|
4088
3633
|
}
|
4089
3634
|
|
4090
|
-
static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
|
4091
|
-
{
|
3635
|
+
static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num) {
|
4092
3636
|
GET_READER();
|
4093
3637
|
MR(ir)->num_docs_cache = -1; /* invalidate cache */
|
4094
3638
|
|
@@ -4098,8 +3642,7 @@ static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
|
|
4098
3642
|
ir->has_changes = true;
|
4099
3643
|
}
|
4100
3644
|
|
4101
|
-
static void mr_undelete_all_i(FrtIndexReader *ir)
|
4102
|
-
{
|
3645
|
+
static void mr_undelete_all_i(FrtIndexReader *ir) {
|
4103
3646
|
int i;
|
4104
3647
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4105
3648
|
|
@@ -4112,8 +3655,7 @@ static void mr_undelete_all_i(FrtIndexReader *ir)
|
|
4112
3655
|
ir->has_changes = true;
|
4113
3656
|
}
|
4114
3657
|
|
4115
|
-
static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
|
4116
|
-
{
|
3658
|
+
static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
|
4117
3659
|
int i;
|
4118
3660
|
ir->deleter = deleter;
|
4119
3661
|
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
@@ -4122,8 +3664,7 @@ static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
|
|
4122
3664
|
}
|
4123
3665
|
}
|
4124
3666
|
|
4125
|
-
static bool mr_is_latest_i(FrtIndexReader *ir)
|
4126
|
-
{
|
3667
|
+
static bool mr_is_latest_i(FrtIndexReader *ir) {
|
4127
3668
|
int i;
|
4128
3669
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4129
3670
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4134,8 +3675,7 @@ static bool mr_is_latest_i(FrtIndexReader *ir)
|
|
4134
3675
|
return true;
|
4135
3676
|
}
|
4136
3677
|
|
4137
|
-
static void mr_commit_i(FrtIndexReader *ir)
|
4138
|
-
{
|
3678
|
+
static void mr_commit_i(FrtIndexReader *ir) {
|
4139
3679
|
int i;
|
4140
3680
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4141
3681
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4144,8 +3684,7 @@ static void mr_commit_i(FrtIndexReader *ir)
|
|
4144
3684
|
}
|
4145
3685
|
}
|
4146
3686
|
|
4147
|
-
static void mr_close_i(FrtIndexReader *ir)
|
4148
|
-
{
|
3687
|
+
static void mr_close_i(FrtIndexReader *ir) {
|
4149
3688
|
int i;
|
4150
3689
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4151
3690
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4351,8 +3890,7 @@ FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store) {
|
|
4351
3890
|
*
|
4352
3891
|
****************************************************************************/
|
4353
3892
|
|
4354
|
-
static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
|
4355
|
-
{
|
3893
|
+
static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos) {
|
4356
3894
|
FrtOccurence *occ = FRT_MP_ALLOC(mp, FrtOccurence);
|
4357
3895
|
occ->pos = pos;
|
4358
3896
|
occ->next = NULL;
|
@@ -4365,8 +3903,7 @@ static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
|
|
4365
3903
|
*
|
4366
3904
|
****************************************************************************/
|
4367
3905
|
|
4368
|
-
FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
|
4369
|
-
{
|
3906
|
+
FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos) {
|
4370
3907
|
FrtPosting *p = FRT_MP_ALLOC(mp, FrtPosting);
|
4371
3908
|
p->doc_num = doc_num;
|
4372
3909
|
p->first_occ = occ_new(mp, pos);
|
@@ -4382,8 +3919,7 @@ FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
|
|
4382
3919
|
****************************************************************************/
|
4383
3920
|
|
4384
3921
|
FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
4385
|
-
int term_len, FrtPosting *p)
|
4386
|
-
{
|
3922
|
+
int term_len, FrtPosting *p) {
|
4387
3923
|
// TODO account for term_len as measured in the original text vs utf8 term_len of term
|
4388
3924
|
FrtPostingList *pl = FRT_MP_ALLOC(mp, FrtPostingList);
|
4389
3925
|
pl->term = (char *)frt_mp_memdup(mp, term, term_len + 1);
|
@@ -4393,20 +3929,17 @@ FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
|
4393
3929
|
return pl;
|
4394
3930
|
}
|
4395
3931
|
|
4396
|
-
void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos)
|
4397
|
-
{
|
3932
|
+
void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos) {
|
4398
3933
|
pl->last_occ = pl->last_occ->next = occ_new(mp, pos);
|
4399
3934
|
pl->last->freq++;
|
4400
3935
|
}
|
4401
3936
|
|
4402
|
-
static void pl_add_posting(FrtPostingList *pl, FrtPosting *p)
|
4403
|
-
{
|
3937
|
+
static void pl_add_posting(FrtPostingList *pl, FrtPosting *p) {
|
4404
3938
|
pl->last = pl->last->next = p;
|
4405
3939
|
pl->last_occ = p->first_occ;
|
4406
3940
|
}
|
4407
3941
|
|
4408
|
-
int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
4409
|
-
{
|
3942
|
+
int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2) {
|
4410
3943
|
return strcmp((*pl1)->term, (*pl2)->term);
|
4411
3944
|
}
|
4412
3945
|
|
@@ -4416,8 +3949,7 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
|
4416
3949
|
*
|
4417
3950
|
****************************************************************************/
|
4418
3951
|
|
4419
|
-
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
4420
|
-
{
|
3952
|
+
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi) {
|
4421
3953
|
FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
|
4422
3954
|
fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
|
4423
3955
|
fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
|
@@ -4434,8 +3966,7 @@ static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
|
4434
3966
|
return fld_inv;
|
4435
3967
|
}
|
4436
3968
|
|
4437
|
-
static void fld_inv_destroy(FrtFieldInverter *fld_inv)
|
4438
|
-
{
|
3969
|
+
static void fld_inv_destroy(FrtFieldInverter *fld_inv) {
|
4439
3970
|
frt_h_destroy(fld_inv->plists);
|
4440
3971
|
}
|
4441
3972
|
|
@@ -4445,8 +3976,7 @@ static void fld_inv_destroy(FrtFieldInverter *fld_inv)
|
|
4445
3976
|
*
|
4446
3977
|
****************************************************************************/
|
4447
3978
|
|
4448
|
-
typedef struct SkipBuffer
|
4449
|
-
{
|
3979
|
+
typedef struct SkipBuffer {
|
4450
3980
|
FrtOutStream *buf;
|
4451
3981
|
FrtOutStream *frq_out;
|
4452
3982
|
FrtOutStream *prx_out;
|
@@ -4455,16 +3985,14 @@ typedef struct SkipBuffer
|
|
4455
3985
|
frt_off_t last_prx_ptr;
|
4456
3986
|
} SkipBuffer;
|
4457
3987
|
|
4458
|
-
static void skip_buf_reset(SkipBuffer *skip_buf)
|
4459
|
-
{
|
3988
|
+
static void skip_buf_reset(SkipBuffer *skip_buf) {
|
4460
3989
|
frt_ramo_reset(skip_buf->buf);
|
4461
3990
|
skip_buf->last_doc = 0;
|
4462
3991
|
skip_buf->last_frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4463
3992
|
skip_buf->last_prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4464
3993
|
}
|
4465
3994
|
|
4466
|
-
static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
4467
|
-
{
|
3995
|
+
static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out) {
|
4468
3996
|
SkipBuffer *skip_buf = FRT_ALLOC(SkipBuffer);
|
4469
3997
|
skip_buf->buf = frt_ram_new_buffer();
|
4470
3998
|
skip_buf->frq_out = frq_out;
|
@@ -4472,8 +4000,7 @@ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
|
4472
4000
|
return skip_buf;
|
4473
4001
|
}
|
4474
4002
|
|
4475
|
-
static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
4476
|
-
{
|
4003
|
+
static void skip_buf_add(SkipBuffer *skip_buf, int doc) {
|
4477
4004
|
frt_off_t frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4478
4005
|
frt_off_t prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4479
4006
|
|
@@ -4486,15 +4013,13 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
|
4486
4013
|
skip_buf->last_prx_ptr = prx_ptr;
|
4487
4014
|
}
|
4488
4015
|
|
4489
|
-
static frt_off_t skip_buf_write(SkipBuffer *skip_buf)
|
4490
|
-
{
|
4016
|
+
static frt_off_t skip_buf_write(SkipBuffer *skip_buf) {
|
4491
4017
|
frt_off_t skip_ptr = frt_os_pos(skip_buf->frq_out);
|
4492
4018
|
frt_ramo_write_to(skip_buf->buf, skip_buf->frq_out);
|
4493
4019
|
return skip_ptr;
|
4494
4020
|
}
|
4495
4021
|
|
4496
|
-
static void skip_buf_destroy(SkipBuffer *skip_buf)
|
4497
|
-
{
|
4022
|
+
static void skip_buf_destroy(SkipBuffer *skip_buf) {
|
4498
4023
|
frt_ram_destroy_buffer(skip_buf->buf);
|
4499
4024
|
free(skip_buf);
|
4500
4025
|
}
|
@@ -4505,21 +4030,19 @@ static void skip_buf_destroy(SkipBuffer *skip_buf)
|
|
4505
4030
|
*
|
4506
4031
|
****************************************************************************/
|
4507
4032
|
|
4508
|
-
static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv)
|
4509
|
-
{
|
4033
|
+
static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv) {
|
4510
4034
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
4511
4035
|
FrtOutStream *norms_out;
|
4512
4036
|
frt_si_advance_norm_gen(dw->si, fld_inv->fi->number);
|
4513
4037
|
si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
|
4514
|
-
norms_out = dw->store->new_output(dw->store, file_name);
|
4038
|
+
norms_out = dw->store->new_output(dw->store, segm_idx_name, file_name);
|
4515
4039
|
frt_os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
|
4516
4040
|
frt_os_close(norms_out);
|
4517
4041
|
}
|
4518
4042
|
|
4519
4043
|
/* we'll use the postings Hash's table area to sort the postings as it is
|
4520
4044
|
* going to be zeroset soon anyway */
|
4521
|
-
static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
|
4522
|
-
{
|
4045
|
+
static FrtPostingList **dw_sort_postings(FrtHash *plists_ht) {
|
4523
4046
|
int i, j;
|
4524
4047
|
FrtHashEntry *he;
|
4525
4048
|
FrtPostingList **plists = (FrtPostingList **)plists_ht->table;
|
@@ -4537,8 +4060,7 @@ static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
|
|
4537
4060
|
return plists;
|
4538
4061
|
}
|
4539
4062
|
|
4540
|
-
static void dw_flush_streams(FrtDocWriter *dw)
|
4541
|
-
{
|
4063
|
+
static void dw_flush_streams(FrtDocWriter *dw) {
|
4542
4064
|
frt_mp_reset(dw->mp);
|
4543
4065
|
frt_fw_close(dw->fw);
|
4544
4066
|
dw->fw = NULL;
|
@@ -4546,8 +4068,7 @@ static void dw_flush_streams(FrtDocWriter *dw)
|
|
4546
4068
|
dw->doc_num = 0;
|
4547
4069
|
}
|
4548
4070
|
|
4549
|
-
static void dw_flush(FrtDocWriter *dw)
|
4550
|
-
{
|
4071
|
+
static void dw_flush(FrtDocWriter *dw) {
|
4551
4072
|
int i, j, last_doc, doc_code, doc_freq, last_pos, posting_count;
|
4552
4073
|
int skip_interval = dw->skip_interval;
|
4553
4074
|
FrtFieldInfos *fis = dw->fis;
|
@@ -4565,9 +4086,9 @@ static void dw_flush(FrtDocWriter *dw)
|
|
4565
4086
|
SkipBuffer *skip_buf;
|
4566
4087
|
|
4567
4088
|
sprintf(file_name, "%s.frq", dw->si->name);
|
4568
|
-
frq_out = store->new_output(store, file_name);
|
4089
|
+
frq_out = store->new_output(store, segm_idx_name, file_name);
|
4569
4090
|
sprintf(file_name, "%s.prx", dw->si->name);
|
4570
|
-
prx_out = store->new_output(store, file_name);
|
4091
|
+
prx_out = store->new_output(store, segm_idx_name, file_name);
|
4571
4092
|
skip_buf = skip_buf_new(frq_out, prx_out);
|
4572
4093
|
|
4573
4094
|
for (i = 0; i < fields_count; i++) {
|
@@ -4601,8 +4122,7 @@ static void dw_flush(FrtDocWriter *dw)
|
|
4601
4122
|
|
4602
4123
|
if (p->freq == 1) {
|
4603
4124
|
frt_os_write_vint(frq_out, 1|doc_code);
|
4604
|
-
}
|
4605
|
-
else {
|
4125
|
+
} else {
|
4606
4126
|
frt_os_write_vint(frq_out, doc_code);
|
4607
4127
|
frt_os_write_vint(frq_out, p->freq);
|
4608
4128
|
}
|
@@ -4625,8 +4145,7 @@ static void dw_flush(FrtDocWriter *dw)
|
|
4625
4145
|
dw_flush_streams(dw);
|
4626
4146
|
}
|
4627
4147
|
|
4628
|
-
FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
4629
|
-
{
|
4148
|
+
FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si) {
|
4630
4149
|
FrtStore *store = iw->store;
|
4631
4150
|
FrtMemoryPool *mp = frt_mp_new_capa(iw->config.chunk_size,
|
4632
4151
|
iw->config.max_buffer_memory/iw->config.chunk_size);
|
@@ -4658,14 +4177,12 @@ FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
|
4658
4177
|
return dw;
|
4659
4178
|
}
|
4660
4179
|
|
4661
|
-
void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si)
|
4662
|
-
{
|
4180
|
+
void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si) {
|
4663
4181
|
dw->fw = frt_fw_open(dw->store, si->name, dw->fis);
|
4664
4182
|
dw->si = si;
|
4665
4183
|
}
|
4666
4184
|
|
4667
|
-
void frt_dw_close(FrtDocWriter *dw)
|
4668
|
-
{
|
4185
|
+
void frt_dw_close(FrtDocWriter *dw) {
|
4669
4186
|
if (dw->doc_num) {
|
4670
4187
|
dw_flush(dw);
|
4671
4188
|
}
|
@@ -4680,8 +4197,7 @@ void frt_dw_close(FrtDocWriter *dw)
|
|
4680
4197
|
free(dw);
|
4681
4198
|
}
|
4682
4199
|
|
4683
|
-
FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi)
|
4684
|
-
{
|
4200
|
+
FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi) {
|
4685
4201
|
FrtFieldInverter *fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number);
|
4686
4202
|
|
4687
4203
|
if (!fld_inv) {
|
@@ -4697,8 +4213,7 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
4697
4213
|
int doc_num,
|
4698
4214
|
const char *text,
|
4699
4215
|
int len,
|
4700
|
-
int pos)
|
4701
|
-
{
|
4216
|
+
int pos) {
|
4702
4217
|
FrtHashEntry *pl_he;
|
4703
4218
|
if (frt_h_set_ext(curr_plists, text, &pl_he)) {
|
4704
4219
|
FrtPosting *p = frt_p_new(mp, doc_num, pos);
|
@@ -4708,21 +4223,18 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
4708
4223
|
if (frt_h_set_ext(fld_plists, text, &fld_pl_he)) {
|
4709
4224
|
fld_pl_he->value = pl = frt_pl_new(mp, text, len, p);
|
4710
4225
|
pl_he->key = fld_pl_he->key = (char *)pl->term;
|
4711
|
-
}
|
4712
|
-
else {
|
4226
|
+
} else {
|
4713
4227
|
pl = (FrtPostingList *)fld_pl_he->value;
|
4714
4228
|
pl_add_posting(pl, p);
|
4715
4229
|
pl_he->key = (char *)pl->term;
|
4716
4230
|
}
|
4717
4231
|
pl_he->value = pl;
|
4718
|
-
}
|
4719
|
-
else {
|
4232
|
+
} else {
|
4720
4233
|
frt_pl_add_occ(mp, (FrtPostingList *)pl_he->value, pos);
|
4721
4234
|
}
|
4722
4235
|
}
|
4723
4236
|
|
4724
|
-
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end)
|
4725
|
-
{
|
4237
|
+
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end) {
|
4726
4238
|
if (pos >= dw->offsets_capa) {
|
4727
4239
|
int old_capa = dw->offsets_capa;
|
4728
4240
|
while (pos >= dw->offsets_capa) {
|
@@ -4786,7 +4298,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDoc
|
|
4786
4298
|
buf[FRT_MAX_WORD_SIZE - 1] = '\0';
|
4787
4299
|
for (i = 0; i < df_size; i++) {
|
4788
4300
|
int len = df->lengths[i];
|
4789
|
-
char *data_ptr = df->data[i];
|
4301
|
+
const char *data_ptr = df->data[i];
|
4790
4302
|
if (len >= FRT_MAX_WORD_SIZE) {
|
4791
4303
|
char *head_last = rb_enc_left_char_head(data_ptr, data_ptr + FRT_MAX_WORD_SIZE - 1, data_ptr + len, df->encodings[i]);
|
4792
4304
|
len = head_last - data_ptr;
|
@@ -4828,7 +4340,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
4828
4340
|
FrtFieldInverter *fld_inv;
|
4829
4341
|
FrtHash *postings;
|
4830
4342
|
FrtFieldInfo *fi;
|
4831
|
-
const int doc_size = doc->
|
4343
|
+
const int doc_size = doc->field_count;
|
4832
4344
|
|
4833
4345
|
/* frt_fw_add_doc will add new fields as necessary */
|
4834
4346
|
frt_fw_add_doc(dw->fw, doc);
|
@@ -4867,23 +4379,21 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
4867
4379
|
****************************************************************************/
|
4868
4380
|
|
4869
4381
|
/* prepare an index ready for writing */
|
4870
|
-
void frt_index_create(FrtStore *store, FrtFieldInfos *fis)
|
4871
|
-
{
|
4382
|
+
void frt_index_create(FrtStore *store, FrtFieldInfos *fis) {
|
4872
4383
|
FrtSegmentInfos *sis = frt_sis_new(fis);
|
4873
|
-
store->clear_all(store);
|
4384
|
+
store->clear_all(store, segm_idx_name);
|
4874
4385
|
frt_sis_write(sis, store, NULL);
|
4875
4386
|
frt_sis_destroy(sis);
|
4876
4387
|
}
|
4877
4388
|
|
4878
4389
|
bool frt_index_is_locked(FrtStore *store) {
|
4879
|
-
FrtLock *write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
|
4390
|
+
FrtLock *write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
4880
4391
|
bool is_locked = write_lock->is_locked(write_lock);
|
4881
4392
|
frt_close_lock(write_lock);
|
4882
4393
|
return is_locked;
|
4883
4394
|
}
|
4884
4395
|
|
4885
|
-
int frt_iw_doc_count(FrtIndexWriter *iw)
|
4886
|
-
{
|
4396
|
+
int frt_iw_doc_count(FrtIndexWriter *iw) {
|
4887
4397
|
int i, doc_cnt = 0;
|
4888
4398
|
pthread_mutex_lock(&iw->mutex);
|
4889
4399
|
for (i = iw->sis->size - 1; i >= 0; i--) {
|
@@ -4909,13 +4419,11 @@ static void iw_flush_ram_segment(FrtIndexWriter *iw) {
|
|
4909
4419
|
pthread_mutex_unlock(&iw->store->mutex);
|
4910
4420
|
}
|
4911
4421
|
|
4912
|
-
void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
|
4913
|
-
{
|
4422
|
+
void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc) {
|
4914
4423
|
pthread_mutex_lock(&iw->mutex);
|
4915
4424
|
if (NULL == iw->dw) {
|
4916
4425
|
iw->dw = frt_dw_open(iw, frt_sis_new_segment(iw->sis, 0, iw->store));
|
4917
|
-
}
|
4918
|
-
else if (NULL == iw->dw->fw) {
|
4426
|
+
} else if (NULL == iw->dw->fw) {
|
4919
4427
|
frt_dw_new_segment(iw->dw, frt_sis_new_segment(iw->sis, 0, iw->store));
|
4920
4428
|
}
|
4921
4429
|
frt_dw_add_doc(iw->dw, doc);
|
@@ -4926,15 +4434,13 @@ void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
|
|
4926
4434
|
pthread_mutex_unlock(&iw->mutex);
|
4927
4435
|
}
|
4928
4436
|
|
4929
|
-
static void iw_commit_i(FrtIndexWriter *iw)
|
4930
|
-
{
|
4437
|
+
static void iw_commit_i(FrtIndexWriter *iw) {
|
4931
4438
|
if (iw->dw && iw->dw->doc_num > 0) {
|
4932
4439
|
iw_flush_ram_segment(iw);
|
4933
4440
|
}
|
4934
4441
|
}
|
4935
4442
|
|
4936
|
-
void frt_iw_commit(FrtIndexWriter *iw)
|
4937
|
-
{
|
4443
|
+
void frt_iw_commit(FrtIndexWriter *iw) {
|
4938
4444
|
pthread_mutex_lock(&iw->mutex);
|
4939
4445
|
iw_commit_i(iw);
|
4940
4446
|
pthread_mutex_unlock(&iw->mutex);
|
@@ -5010,8 +4516,7 @@ void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int t
|
|
5010
4516
|
}
|
5011
4517
|
}
|
5012
4518
|
|
5013
|
-
void frt_iw_close(FrtIndexWriter *iw)
|
5014
|
-
{
|
4519
|
+
void frt_iw_close(FrtIndexWriter *iw) {
|
5015
4520
|
pthread_mutex_lock(&iw->mutex);
|
5016
4521
|
iw_commit_i(iw);
|
5017
4522
|
if (iw->dw) {
|
@@ -5047,7 +4552,7 @@ FrtIndexWriter *frt_iw_open(FrtIndexWriter *iw, FrtStore *store, FrtAnalyzer *vo
|
|
5047
4552
|
iw->config = *config;
|
5048
4553
|
|
5049
4554
|
FRT_TRY
|
5050
|
-
iw->write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
|
4555
|
+
iw->write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
5051
4556
|
if (!iw->write_lock->obtain(iw->write_lock)) {
|
5052
4557
|
FRT_RAISE(FRT_LOCK_ERROR, "Couldn't obtain write lock when opening IndexWriter");
|
5053
4558
|
}
|
@@ -5089,21 +4594,21 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
5089
4594
|
char *sr_segment = sr->si->name;
|
5090
4595
|
|
5091
4596
|
sprintf(file_name, "%s.fdt", segment);
|
5092
|
-
fdt_out = store_out->new_output(store_out, file_name);
|
4597
|
+
fdt_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5093
4598
|
sprintf(file_name, "%s.fdx", segment);
|
5094
|
-
fdx_out = store_out->new_output(store_out, file_name);
|
4599
|
+
fdx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5095
4600
|
|
5096
4601
|
sprintf(file_name, "%s.fdt", sr_segment);
|
5097
|
-
fdt_in = store_in->open_input(store_in, file_name);
|
4602
|
+
fdt_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5098
4603
|
sprintf(file_name, "%s.fdx", sr_segment);
|
5099
|
-
fdx_in = store_in->open_input(store_in, file_name);
|
4604
|
+
fdx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5100
4605
|
|
5101
4606
|
sprintf(file_name, "%s.del", sr_segment);
|
5102
|
-
if (store_in->exists(store_in, file_name)) {
|
4607
|
+
if (store_in->exists(store_in, segm_idx_name, file_name)) {
|
5103
4608
|
FrtOutStream *del_out;
|
5104
|
-
FrtInStream *del_in = store_in->open_input(store_in, file_name);
|
4609
|
+
FrtInStream *del_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5105
4610
|
sprintf(file_name, "%s.del", segment);
|
5106
|
-
del_out = store_out->new_output(store_out, file_name);
|
4611
|
+
del_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5107
4612
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
5108
4613
|
frt_os_close(del_out);
|
5109
4614
|
frt_is_close(del_in);
|
@@ -5170,8 +4675,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
5170
4675
|
}
|
5171
4676
|
|
5172
4677
|
static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
5173
|
-
const char *segment, int *map)
|
5174
|
-
{
|
4678
|
+
const char *segment, int *map) {
|
5175
4679
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5176
4680
|
FrtOutStream *tix_out, *tis_out, *tfx_out, *frq_out, *prx_out;
|
5177
4681
|
FrtInStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
|
@@ -5180,29 +4684,29 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5180
4684
|
char *sr_segment = sr->si->name;
|
5181
4685
|
|
5182
4686
|
sprintf(file_name, "%s.tix", segment);
|
5183
|
-
tix_out = store_out->new_output(store_out, file_name);
|
4687
|
+
tix_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5184
4688
|
sprintf(file_name, "%s.tix", sr_segment);
|
5185
|
-
tix_in = store_in->open_input(store_in, file_name);
|
4689
|
+
tix_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5186
4690
|
|
5187
4691
|
sprintf(file_name, "%s.tis", segment);
|
5188
|
-
tis_out = store_out->new_output(store_out, file_name);
|
4692
|
+
tis_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5189
4693
|
sprintf(file_name, "%s.tis", sr_segment);
|
5190
|
-
tis_in = store_in->open_input(store_in, file_name);
|
4694
|
+
tis_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5191
4695
|
|
5192
4696
|
sprintf(file_name, "%s.tfx", segment);
|
5193
|
-
tfx_out = store_out->new_output(store_out, file_name);
|
4697
|
+
tfx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5194
4698
|
sprintf(file_name, "%s.tfx", sr_segment);
|
5195
|
-
tfx_in = store_in->open_input(store_in, file_name);
|
4699
|
+
tfx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5196
4700
|
|
5197
4701
|
sprintf(file_name, "%s.frq", segment);
|
5198
|
-
frq_out = store_out->new_output(store_out, file_name);
|
4702
|
+
frq_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5199
4703
|
sprintf(file_name, "%s.frq", sr_segment);
|
5200
|
-
frq_in = store_in->open_input(store_in, file_name);
|
4704
|
+
frq_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5201
4705
|
|
5202
4706
|
sprintf(file_name, "%s.prx", segment);
|
5203
|
-
prx_out = store_out->new_output(store_out, file_name);
|
4707
|
+
prx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5204
4708
|
sprintf(file_name, "%s.prx", sr_segment);
|
5205
|
-
prx_in = store_in->open_input(store_in, file_name);
|
4709
|
+
prx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5206
4710
|
|
5207
4711
|
if (map) {
|
5208
4712
|
int field_cnt = frt_is_read_u32(tfx_in);
|
@@ -5217,8 +4721,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5217
4721
|
frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* index size */
|
5218
4722
|
frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* dict size */
|
5219
4723
|
}
|
5220
|
-
}
|
5221
|
-
else {
|
4724
|
+
} else {
|
5222
4725
|
frt_is2os_copy_bytes(tfx_in, tfx_out, frt_is_length(tfx_in));
|
5223
4726
|
}
|
5224
4727
|
frt_is2os_copy_bytes(tix_in, tix_out, frt_is_length(tix_in));
|
@@ -5239,8 +4742,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5239
4742
|
}
|
5240
4743
|
|
5241
4744
|
static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
5242
|
-
FrtSegmentInfo *si, int *map)
|
5243
|
-
{
|
4745
|
+
FrtSegmentInfo *si, int *map) {
|
5244
4746
|
int i;
|
5245
4747
|
FrtFieldInfos *fis = IR(sr)->fis;
|
5246
4748
|
const int field_cnt = fis->size;
|
@@ -5256,10 +4758,10 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5256
4758
|
FrtStore *store = IR(sr)->store;
|
5257
4759
|
int field_num = map ? map[i] : i;
|
5258
4760
|
|
5259
|
-
norms_in = store->open_input(store, file_name_in);
|
4761
|
+
norms_in = store->open_input(store, segm_idx_name, file_name_in);
|
5260
4762
|
frt_si_advance_norm_gen(si, field_num);
|
5261
4763
|
si_norm_file_name(si, file_name_out, field_num);
|
5262
|
-
norms_out = store_out->new_output(store_out, file_name_out);
|
4764
|
+
norms_out = store_out->new_output(store_out, segm_idx_name, file_name_out);
|
5263
4765
|
frt_is2os_copy_bytes(norms_in, norms_out, frt_is_length(norms_in));
|
5264
4766
|
frt_os_close(norms_out);
|
5265
4767
|
frt_is_close(norms_in);
|