isomorfeus-ferret 0.17.2 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -44,13 +44,12 @@ static char *ste_next(FrtTermEnum *te);
|
|
44
44
|
|
45
45
|
/* *** Must be three characters *** */
|
46
46
|
static const char *INDEX_EXTENSIONS[] = {
|
47
|
-
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
|
47
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
|
48
48
|
};
|
49
49
|
|
50
50
|
static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
51
51
|
|
52
|
-
static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
|
53
|
-
{
|
52
|
+
static char *u64_to_str36(char *buf, int buf_size, frt_u64 u) {
|
54
53
|
int i = buf_size - 1;
|
55
54
|
buf[i] = '\0';
|
56
55
|
for (i--; i >= 0; i--) {
|
@@ -67,17 +66,14 @@ static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
|
|
67
66
|
return buf + i;
|
68
67
|
}
|
69
68
|
|
70
|
-
static frt_u64 str36_to_u64(char *p)
|
71
|
-
{
|
69
|
+
static frt_u64 str36_to_u64(char *p) {
|
72
70
|
frt_u64 u = 0;
|
73
71
|
while (true) {
|
74
72
|
if ('0' <= *p && '9' >= *p) {
|
75
73
|
u = u * 36 + *p - '0';
|
76
|
-
}
|
77
|
-
else if ('a' <= *p && 'z' >= *p) {
|
74
|
+
} else if ('a' <= *p && 'z' >= *p) {
|
78
75
|
u = u * 36 + *p - 'a' + 10;
|
79
|
-
}
|
80
|
-
else {
|
76
|
+
} else {
|
81
77
|
break;
|
82
78
|
}
|
83
79
|
p++;
|
@@ -134,12 +130,10 @@ static char *fn_for_gen_field(char *buf,
|
|
134
130
|
const char *base,
|
135
131
|
const char *ext,
|
136
132
|
frt_i64 gen,
|
137
|
-
int field_num)
|
138
|
-
{
|
133
|
+
int field_num) {
|
139
134
|
if (-1 == gen) {
|
140
135
|
return NULL;
|
141
|
-
}
|
142
|
-
else {
|
136
|
+
} else {
|
143
137
|
char b[FRT_SEGMENT_NAME_MAX_LENGTH];
|
144
138
|
sprintf(buf, "%s_%s.%s%d",
|
145
139
|
base,
|
@@ -156,18 +150,15 @@ static char *fn_for_gen_field(char *buf,
|
|
156
150
|
*
|
157
151
|
***************************************************************************/
|
158
152
|
|
159
|
-
static unsigned long co_hash(const void *key)
|
160
|
-
|
161
|
-
return (unsigned long)key;
|
153
|
+
static unsigned long co_hash(const void *key) {
|
154
|
+
return (unsigned long)(uintptr_t)key;
|
162
155
|
}
|
163
156
|
|
164
|
-
static int co_eq(const void *key1, const void *key2)
|
165
|
-
{
|
157
|
+
static int co_eq(const void *key1, const void *key2) {
|
166
158
|
return (key1 == key2);
|
167
159
|
}
|
168
160
|
|
169
|
-
static void co_destroy(FrtCacheObject *self)
|
170
|
-
{
|
161
|
+
static void co_destroy(FrtCacheObject *self) {
|
171
162
|
frt_h_rem(self->ref_tab1, self->ref2, false);
|
172
163
|
frt_h_rem(self->ref_tab2, self->ref1, false);
|
173
164
|
self->destroy(self->obj);
|
@@ -175,8 +166,7 @@ static void co_destroy(FrtCacheObject *self)
|
|
175
166
|
}
|
176
167
|
|
177
168
|
FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
|
178
|
-
void *ref1, void *ref2, frt_free_ft destroy, void *obj)
|
179
|
-
{
|
169
|
+
void *ref1, void *ref2, frt_free_ft destroy, void *obj) {
|
180
170
|
FrtCacheObject *self = FRT_ALLOC(FrtCacheObject);
|
181
171
|
frt_h_set(ref_tab1, ref2, self);
|
182
172
|
frt_h_set(ref_tab2, ref1, self);
|
@@ -193,302 +183,13 @@ FrtHash *frt_co_hash_create(void) {
|
|
193
183
|
return frt_h_new(&co_hash, &co_eq, (frt_free_ft)NULL, (frt_free_ft)&co_destroy);
|
194
184
|
}
|
195
185
|
|
196
|
-
/****************************************************************************
|
197
|
-
*
|
198
|
-
* FieldInfo
|
199
|
-
*
|
200
|
-
****************************************************************************/
|
201
|
-
|
202
|
-
static void fi_check_params(unsigned int bits) {
|
203
|
-
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
204
|
-
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
205
|
-
}
|
206
|
-
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
207
|
-
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
208
|
-
}
|
209
|
-
}
|
210
|
-
|
211
|
-
FrtFieldInfo *frt_fi_alloc(void) {
|
212
|
-
return FRT_ALLOC(FrtFieldInfo);
|
213
|
-
}
|
214
|
-
|
215
|
-
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
216
|
-
assert(NULL != name);
|
217
|
-
fi_check_params(bits);
|
218
|
-
fi->name = name;
|
219
|
-
fi->boost = 1.0f;
|
220
|
-
fi->bits = bits;
|
221
|
-
fi->number = 0;
|
222
|
-
fi->ref_cnt = 1;
|
223
|
-
fi->rfi = Qnil;
|
224
|
-
return fi;
|
225
|
-
}
|
226
|
-
|
227
|
-
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
228
|
-
FrtFieldInfo *fi = frt_fi_alloc();
|
229
|
-
return frt_fi_init(fi, name, bits);
|
230
|
-
}
|
231
|
-
|
232
|
-
void frt_fi_deref(FrtFieldInfo *fi) {
|
233
|
-
if (FRT_DEREF(fi) == 0) free(fi);
|
234
|
-
}
|
235
|
-
|
236
|
-
FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
|
237
|
-
if (bits_is_compressed(fi->bits)) {
|
238
|
-
if (bits_is_compressed_brotli(fi->bits)) {
|
239
|
-
return FRT_COMPRESSION_BROTLI;
|
240
|
-
} else if (bits_is_compressed_bz2(fi->bits)) {
|
241
|
-
return FRT_COMPRESSION_BZ2;
|
242
|
-
} else if (bits_is_compressed_lz4(fi->bits)) {
|
243
|
-
return FRT_COMPRESSION_LZ4;
|
244
|
-
} else {
|
245
|
-
return FRT_COMPRESSION_BROTLI;
|
246
|
-
}
|
247
|
-
} else {
|
248
|
-
return FRT_COMPRESSION_NONE;
|
249
|
-
}
|
250
|
-
}
|
251
|
-
|
252
|
-
char *frt_fi_to_s(FrtFieldInfo *fi)
|
253
|
-
{
|
254
|
-
const char *fi_name = rb_id2name(fi->name);
|
255
|
-
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
256
|
-
char *s = str;
|
257
|
-
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
258
|
-
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
259
|
-
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
260
|
-
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
261
|
-
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
262
|
-
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
263
|
-
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
264
|
-
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
265
|
-
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
266
|
-
s -= 2;
|
267
|
-
if (*s != ',') {
|
268
|
-
s += 2;
|
269
|
-
}
|
270
|
-
|
271
|
-
sprintf(s, ")]");
|
272
|
-
return str;
|
273
|
-
}
|
274
|
-
|
275
|
-
/****************************************************************************
|
276
|
-
*
|
277
|
-
* FieldInfos
|
278
|
-
*
|
279
|
-
****************************************************************************/
|
280
|
-
|
281
|
-
FrtFieldInfos *frt_fis_alloc(void) {
|
282
|
-
return FRT_ALLOC(FrtFieldInfos);
|
283
|
-
}
|
284
|
-
|
285
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
286
|
-
fi_check_params(bits);
|
287
|
-
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
288
|
-
fis->size = 0;
|
289
|
-
fis->capa = FIELD_INFOS_INIT_CAPA;
|
290
|
-
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
291
|
-
fis->bits = bits;
|
292
|
-
fis->ref_cnt = 1;
|
293
|
-
fis->rfis = Qnil;
|
294
|
-
return fis;
|
295
|
-
}
|
296
|
-
|
297
|
-
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
298
|
-
FrtFieldInfos *fis = frt_fis_alloc();
|
299
|
-
return frt_fis_init(fis, bits);
|
300
|
-
}
|
301
|
-
|
302
|
-
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
303
|
-
if (fis->size == fis->capa) {
|
304
|
-
fis->capa <<= 1;
|
305
|
-
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
306
|
-
}
|
307
|
-
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
308
|
-
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
309
|
-
}
|
310
|
-
FRT_REF(fi);
|
311
|
-
fi->number = fis->size;
|
312
|
-
fis->fields[fis->size] = fi;
|
313
|
-
fis->size++;
|
314
|
-
return fi;
|
315
|
-
}
|
316
|
-
|
317
|
-
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
318
|
-
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
319
|
-
}
|
320
|
-
|
321
|
-
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
322
|
-
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
323
|
-
if (fi) { return fi->number; }
|
324
|
-
else { return -1; }
|
325
|
-
}
|
326
|
-
|
327
|
-
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
328
|
-
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
329
|
-
if (!fi) {
|
330
|
-
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
331
|
-
frt_fis_add_field(fis, fi);
|
332
|
-
}
|
333
|
-
return fi;
|
334
|
-
}
|
335
|
-
|
336
|
-
FrtFieldInfos *frt_fis_read(FrtInStream *is)
|
337
|
-
{
|
338
|
-
FrtFieldInfos *volatile fis = NULL;
|
339
|
-
char *field_name;
|
340
|
-
FRT_TRY
|
341
|
-
do {
|
342
|
-
volatile int i;
|
343
|
-
union { frt_u32 i; float f; } tmp;
|
344
|
-
FrtFieldInfo *volatile fi;
|
345
|
-
fis = frt_fis_new(frt_is_read_vint(is));
|
346
|
-
for (i = frt_is_read_vint(is); i > 0; i--) {
|
347
|
-
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
348
|
-
FRT_TRY
|
349
|
-
field_name = frt_is_read_string_safe(is);
|
350
|
-
fi->name = rb_intern(field_name);
|
351
|
-
free(field_name);
|
352
|
-
tmp.i = frt_is_read_u32(is);
|
353
|
-
fi->boost = tmp.f;
|
354
|
-
fi->bits = frt_is_read_vint(is);
|
355
|
-
FRT_XCATCHALL
|
356
|
-
free(fi);
|
357
|
-
FRT_XENDTRY
|
358
|
-
frt_fis_add_field(fis, fi);
|
359
|
-
fi->ref_cnt = 1;
|
360
|
-
}
|
361
|
-
} while (0);
|
362
|
-
FRT_XCATCHALL
|
363
|
-
frt_fis_deref(fis);
|
364
|
-
FRT_XENDTRY
|
365
|
-
return fis;
|
366
|
-
}
|
367
|
-
|
368
|
-
void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
369
|
-
{
|
370
|
-
int i;
|
371
|
-
union { frt_u32 i; float f; } tmp;
|
372
|
-
FrtFieldInfo *fi;
|
373
|
-
const int fis_size = fis->size;
|
374
|
-
|
375
|
-
frt_os_write_vint(os, fis->bits);
|
376
|
-
frt_os_write_vint(os, fis->size);
|
377
|
-
|
378
|
-
for (i = 0; i < fis_size; i++) {
|
379
|
-
fi = fis->fields[i];
|
380
|
-
|
381
|
-
frt_os_write_string(os, rb_id2name(fi->name));
|
382
|
-
tmp.f = fi->boost;
|
383
|
-
frt_os_write_u32(os, tmp.i);
|
384
|
-
frt_os_write_vint(os, fi->bits);
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
|
-
static const char *store_str[] = {
|
389
|
-
":no",
|
390
|
-
":yes",
|
391
|
-
"",
|
392
|
-
":compressed"
|
393
|
-
};
|
394
|
-
|
395
|
-
static const char *fi_store_str(FrtFieldInfo *fi)
|
396
|
-
{
|
397
|
-
return store_str[fi->bits & 0x3];
|
398
|
-
}
|
399
|
-
|
400
|
-
static const char *index_str[] = {
|
401
|
-
":no",
|
402
|
-
":untokenized",
|
403
|
-
"",
|
404
|
-
":yes",
|
405
|
-
"",
|
406
|
-
":untokenized_omit_norms",
|
407
|
-
"",
|
408
|
-
":omit_norms"
|
409
|
-
};
|
410
|
-
|
411
|
-
static const char *fi_index_str(FrtFieldInfo *fi)
|
412
|
-
{
|
413
|
-
return index_str[(fi->bits >> 2) & 0x7];
|
414
|
-
}
|
415
|
-
|
416
|
-
static const char *term_vector_str[] = {
|
417
|
-
":no",
|
418
|
-
":yes",
|
419
|
-
"",
|
420
|
-
":with_positions",
|
421
|
-
"",
|
422
|
-
":with_offsets",
|
423
|
-
"",
|
424
|
-
":with_positions_offsets"
|
425
|
-
};
|
426
|
-
|
427
|
-
static const char *fi_term_vector_str(FrtFieldInfo *fi)
|
428
|
-
{
|
429
|
-
return term_vector_str[(fi->bits >> 5) & 0x7];
|
430
|
-
}
|
431
|
-
|
432
|
-
char *frt_fis_to_s(FrtFieldInfos *fis)
|
433
|
-
{
|
434
|
-
int i, pos, capa = 200 + fis->size * 120;
|
435
|
-
char *buf = FRT_ALLOC_N(char, capa);
|
436
|
-
FrtFieldInfo *fi;
|
437
|
-
const int fis_size = fis->size;
|
438
|
-
|
439
|
-
pos = sprintf(buf,
|
440
|
-
"default:\n"
|
441
|
-
" store: %s\n"
|
442
|
-
" index: %s\n"
|
443
|
-
" term_vector: %s\n"
|
444
|
-
"fields:\n",
|
445
|
-
store_str[fis->bits & 0x3],
|
446
|
-
index_str[(fis->bits >> 2) & 0x7],
|
447
|
-
term_vector_str[(fis->bits >> 5) & 0x7]);
|
448
|
-
for (i = 0; i < fis_size; i++) {
|
449
|
-
fi = fis->fields[i];
|
450
|
-
pos += sprintf(buf + pos,
|
451
|
-
" %s:\n"
|
452
|
-
" boost: %f\n"
|
453
|
-
" store: %s\n"
|
454
|
-
" index: %s\n"
|
455
|
-
" term_vector: %s\n",
|
456
|
-
rb_id2name(fi->name), fi->boost, fi_store_str(fi),
|
457
|
-
fi_index_str(fi), fi_term_vector_str(fi));
|
458
|
-
}
|
459
|
-
|
460
|
-
return buf;
|
461
|
-
}
|
462
|
-
|
463
|
-
void frt_fis_deref(FrtFieldInfos *fis) {
|
464
|
-
if (FRT_DEREF(fis) == 0) {
|
465
|
-
frt_h_destroy(fis->field_dict);
|
466
|
-
free(fis->fields);
|
467
|
-
free(fis);
|
468
|
-
}
|
469
|
-
}
|
470
|
-
|
471
|
-
static bool fis_has_vectors(FrtFieldInfos *fis)
|
472
|
-
{
|
473
|
-
int i;
|
474
|
-
const int fis_size = fis->size;
|
475
|
-
|
476
|
-
for (i = 0; i < fis_size; i++) {
|
477
|
-
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
478
|
-
return true;
|
479
|
-
}
|
480
|
-
}
|
481
|
-
return false;
|
482
|
-
}
|
483
|
-
|
484
186
|
/****************************************************************************
|
485
187
|
*
|
486
188
|
* SegmentInfo
|
487
189
|
*
|
488
190
|
****************************************************************************/
|
489
191
|
|
490
|
-
FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
|
491
|
-
{
|
192
|
+
FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store) {
|
492
193
|
FrtSegmentInfo *si = FRT_ALLOC(FrtSegmentInfo);
|
493
194
|
si->name = name;
|
494
195
|
si->doc_cnt = doc_cnt;
|
@@ -501,8 +202,7 @@ FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
|
|
501
202
|
return si;
|
502
203
|
}
|
503
204
|
|
504
|
-
static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
|
505
|
-
{
|
205
|
+
static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is) {
|
506
206
|
FrtSegmentInfo *volatile si = FRT_ALLOC_AND_ZERO(FrtSegmentInfo);
|
507
207
|
FRT_TRY
|
508
208
|
si->store = store;
|
@@ -527,8 +227,7 @@ static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
|
|
527
227
|
return si;
|
528
228
|
}
|
529
229
|
|
530
|
-
static void si_write(FrtSegmentInfo *si, FrtOutStream *os)
|
531
|
-
{
|
230
|
+
static void si_write(FrtSegmentInfo *si, FrtOutStream *os) {
|
532
231
|
frt_os_write_string(os, si->name);
|
533
232
|
frt_os_write_vint(os, si->doc_cnt);
|
534
233
|
frt_os_write_vint(os, si->del_gen);
|
@@ -550,13 +249,11 @@ void frt_si_close(FrtSegmentInfo *si) {
|
|
550
249
|
}
|
551
250
|
}
|
552
251
|
|
553
|
-
bool frt_si_has_deletions(FrtSegmentInfo *si)
|
554
|
-
{
|
252
|
+
bool frt_si_has_deletions(FrtSegmentInfo *si) {
|
555
253
|
return si->del_gen >= 0;
|
556
254
|
}
|
557
255
|
|
558
|
-
void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
|
559
|
-
{
|
256
|
+
void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num) {
|
560
257
|
if (field_num >= si->norm_gens_size) {
|
561
258
|
int i;
|
562
259
|
FRT_REALLOC_N(si->norm_gens, int, field_num + 1);
|
@@ -568,8 +265,7 @@ void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
|
|
568
265
|
si->norm_gens[field_num]++;
|
569
266
|
}
|
570
267
|
|
571
|
-
static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
|
572
|
-
{
|
268
|
+
static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num) {
|
573
269
|
int norm_gen;
|
574
270
|
if (field_num >= si->norm_gens_size
|
575
271
|
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
@@ -588,9 +284,7 @@ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
|
588
284
|
*
|
589
285
|
****************************************************************************/
|
590
286
|
|
591
|
-
|
592
|
-
static char *new_segment(frt_i64 generation)
|
593
|
-
{
|
287
|
+
static char *new_segment(frt_i64 generation) {
|
594
288
|
char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
|
595
289
|
char *fn_p = u64_to_str36(buf, FRT_SEGMENT_NAME_MAX_LENGTH - 1,
|
596
290
|
(frt_u64)generation);
|
@@ -611,8 +305,7 @@ typedef struct FindSegmentsFile {
|
|
611
305
|
} ret;
|
612
306
|
} FindSegmentsFile;
|
613
307
|
|
614
|
-
static void which_gen_i(const char *file_name, void *arg)
|
615
|
-
{
|
308
|
+
static void which_gen_i(const char *file_name, void *arg) {
|
616
309
|
frt_i64 *max_generation = (frt_i64 *)arg;
|
617
310
|
if (0 == strncmp(FRT_SEGMENTS_FILE_NAME"_", file_name,
|
618
311
|
sizeof(FRT_SEGMENTS_FILE_NAME))) {
|
@@ -658,10 +351,9 @@ void frt_sis_put(FrtSegmentInfos *sis, FILE *stream) {
|
|
658
351
|
*
|
659
352
|
* @param store - the Store to look in
|
660
353
|
*/
|
661
|
-
frt_i64 frt_sis_current_segment_generation(FrtStore *store)
|
662
|
-
{
|
354
|
+
frt_i64 frt_sis_current_segment_generation(FrtStore *store) {
|
663
355
|
frt_i64 current_generation = -1;
|
664
|
-
store->each(store, &which_gen_i, ¤t_generation);
|
356
|
+
store->each(store, segm_idx_name, &which_gen_i, ¤t_generation);
|
665
357
|
return current_generation;
|
666
358
|
}
|
667
359
|
|
@@ -672,8 +364,7 @@ frt_i64 frt_sis_current_segment_generation(FrtStore *store)
|
|
672
364
|
* @param store - the Store to look in
|
673
365
|
* @return segments_N where N is the current generation
|
674
366
|
*/
|
675
|
-
char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
|
676
|
-
{
|
367
|
+
char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store) {
|
677
368
|
return segfn_for_generation(buf, frt_sis_current_segment_generation(store));
|
678
369
|
}
|
679
370
|
|
@@ -686,16 +377,14 @@ char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
|
|
686
377
|
*/
|
687
378
|
/*
|
688
379
|
FIXME: not used
|
689
|
-
static char *sis_next_seg_file_name(char *buf, FrtStore *store)
|
690
|
-
{
|
380
|
+
static char *sis_next_seg_file_name(char *buf, FrtStore *store) {
|
691
381
|
return segfn_for_generation(buf, frt_sis_current_segment_generation(store) + 1);
|
692
382
|
}
|
693
383
|
*/
|
694
384
|
|
695
385
|
#define GEN_FILE_RETRY_COUNT 10
|
696
386
|
#define GEN_LOOK_AHEAD_COUNT 10
|
697
|
-
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir)
|
698
|
-
{
|
387
|
+
static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir) {
|
699
388
|
volatile int i;
|
700
389
|
volatile int gen_look_ahead_count = 0;
|
701
390
|
volatile bool retry = false;
|
@@ -732,7 +421,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
732
421
|
FrtInStream *gen_is;
|
733
422
|
gen_is = NULL;
|
734
423
|
FRT_TRY
|
735
|
-
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
424
|
+
gen_is = store->open_input(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
|
736
425
|
FRT_XCATCHALL
|
737
426
|
FRT_HANDLED();
|
738
427
|
/* TODO:LOG "segments open: FRT_IO_ERROR"*/
|
@@ -782,7 +471,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
782
471
|
* this must be a real error. We throw the original exception
|
783
472
|
* we got. */
|
784
473
|
char *listing, listing_buffer[1024];
|
785
|
-
listing =
|
474
|
+
listing = frt_store_folder_to_s(store, segm_idx_name);
|
786
475
|
strncpy(listing_buffer, listing, 1023);
|
787
476
|
listing_buffer[1023] = '\0';
|
788
477
|
free(listing);
|
@@ -815,7 +504,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
815
504
|
* and try it if so: */
|
816
505
|
char prev_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
817
506
|
segfn_for_generation(prev_seg_file_name, gen - 1);
|
818
|
-
if (store->exists(store, prev_seg_file_name)) {
|
507
|
+
if (store->exists(store, segm_idx_name, prev_seg_file_name)) {
|
819
508
|
/* TODO:LOG "fallback to prior segment file '" +
|
820
509
|
* prevSegmentFileName + "'" */
|
821
510
|
FRT_TRY
|
@@ -839,8 +528,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
|
|
839
528
|
}
|
840
529
|
}
|
841
530
|
|
842
|
-
FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
|
843
|
-
{
|
531
|
+
FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis) {
|
844
532
|
FrtSegmentInfos *sis = FRT_ALLOC_AND_ZERO(FrtSegmentInfos);
|
845
533
|
FRT_REF(fis);
|
846
534
|
sis->fis = fis;
|
@@ -854,13 +542,11 @@ FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
|
|
854
542
|
return sis;
|
855
543
|
}
|
856
544
|
|
857
|
-
FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store)
|
858
|
-
{
|
545
|
+
FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store) {
|
859
546
|
return frt_sis_add_si(sis, frt_si_new(new_segment(sis->counter++), doc_cnt, store));
|
860
547
|
}
|
861
548
|
|
862
|
-
void frt_sis_destroy(FrtSegmentInfos *sis)
|
863
|
-
{
|
549
|
+
void frt_sis_destroy(FrtSegmentInfos *sis) {
|
864
550
|
int i;
|
865
551
|
const int sis_size = sis->size;
|
866
552
|
for (i = 0; i < sis_size; i++) {
|
@@ -872,8 +558,7 @@ void frt_sis_destroy(FrtSegmentInfos *sis)
|
|
872
558
|
free(sis);
|
873
559
|
}
|
874
560
|
|
875
|
-
FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
|
876
|
-
{
|
561
|
+
FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si) {
|
877
562
|
if (sis->size >= sis->capa) {
|
878
563
|
sis->capa <<= 1;
|
879
564
|
FRT_REALLOC_N(sis->segs, FrtSegmentInfo *, sis->capa);
|
@@ -882,8 +567,7 @@ FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
|
|
882
567
|
return si;
|
883
568
|
}
|
884
569
|
|
885
|
-
void frt_sis_del_at(FrtSegmentInfos *sis, int at)
|
886
|
-
{
|
570
|
+
void frt_sis_del_at(FrtSegmentInfos *sis, int at) {
|
887
571
|
int i;
|
888
572
|
const int sis_size = --(sis->size);
|
889
573
|
frt_si_close(sis->segs[at]);
|
@@ -892,8 +576,7 @@ void frt_sis_del_at(FrtSegmentInfos *sis, int at)
|
|
892
576
|
}
|
893
577
|
}
|
894
578
|
|
895
|
-
void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
896
|
-
{
|
579
|
+
void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to) {
|
897
580
|
int i, num_to_del = to - from;
|
898
581
|
const int sis_size = sis->size -= num_to_del;
|
899
582
|
for (i = from; i < to; i++) {
|
@@ -904,8 +587,7 @@ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
|
|
904
587
|
}
|
905
588
|
}
|
906
589
|
|
907
|
-
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
908
|
-
{
|
590
|
+
static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
|
909
591
|
int seg_cnt;
|
910
592
|
int i;
|
911
593
|
frt_u32 format = 0;
|
@@ -916,7 +598,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
916
598
|
segfn_for_generation(seg_file_name, fsf->generation);
|
917
599
|
fsf->ret.sis = NULL;
|
918
600
|
FRT_TRY
|
919
|
-
is = store->open_input(store, seg_file_name);
|
601
|
+
is = store->open_input(store, segm_idx_name, seg_file_name);
|
920
602
|
sis->store = store;
|
921
603
|
FRT_REF(store);
|
922
604
|
sis->generation = fsf->generation;
|
@@ -942,22 +624,20 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
942
624
|
fsf->ret.sis = sis;
|
943
625
|
}
|
944
626
|
|
945
|
-
FrtSegmentInfos *frt_sis_read(FrtStore *store)
|
946
|
-
{
|
627
|
+
FrtSegmentInfos *frt_sis_read(FrtStore *store) {
|
947
628
|
FindSegmentsFile fsf;
|
948
629
|
sis_find_segments_file(store, &fsf, &frt_sis_read_i, NULL);
|
949
630
|
return fsf.ret.sis;
|
950
631
|
}
|
951
632
|
|
952
|
-
void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
953
|
-
{
|
633
|
+
void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter) {
|
954
634
|
int i;
|
955
635
|
FrtOutStream *volatile os = NULL;
|
956
636
|
const int sis_size = sis->size;
|
957
637
|
char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
|
958
638
|
sis->generation++;
|
959
639
|
FRT_TRY
|
960
|
-
os = store->new_output(store, segfn_for_generation(buf, sis->generation));
|
640
|
+
os = store->new_output(store, segm_idx_name, segfn_for_generation(buf, sis->generation));
|
961
641
|
frt_os_write_u32(os, FORMAT);
|
962
642
|
frt_os_write_u64(os, ++(sis->version)); /* every write changes the index */
|
963
643
|
frt_os_write_u64(os, sis->counter);
|
@@ -971,7 +651,7 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
971
651
|
FRT_XENDTRY
|
972
652
|
|
973
653
|
FRT_TRY
|
974
|
-
os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
|
654
|
+
os = store->new_output(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
|
975
655
|
frt_os_write_u64(os, sis->generation);
|
976
656
|
frt_os_write_u64(os, sis->generation);
|
977
657
|
FRT_XFINALLY
|
@@ -985,15 +665,14 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
985
665
|
}
|
986
666
|
}
|
987
667
|
|
988
|
-
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
989
|
-
{
|
668
|
+
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
|
990
669
|
FrtInStream *is;
|
991
670
|
frt_u32 format = 0;
|
992
671
|
frt_u64 version = 0;
|
993
672
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
994
673
|
|
995
674
|
segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
|
996
|
-
is = store->open_input(store, seg_file_name);
|
675
|
+
is = store->open_input(store, segm_idx_name, seg_file_name);
|
997
676
|
|
998
677
|
FRT_TRY
|
999
678
|
format = frt_is_read_u32(is); // format
|
@@ -1006,8 +685,7 @@ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexR
|
|
1006
685
|
fsf->ret.uint64 = version;
|
1007
686
|
}
|
1008
687
|
|
1009
|
-
frt_u64 frt_sis_read_current_version(FrtStore *store)
|
1010
|
-
{
|
688
|
+
frt_u64 frt_sis_read_current_version(FrtStore *store) {
|
1011
689
|
FindSegmentsFile fsf;
|
1012
690
|
sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i, NULL);
|
1013
691
|
return fsf.ret.uint64;
|
@@ -1029,9 +707,9 @@ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1029
707
|
fr->fis = fis;
|
1030
708
|
|
1031
709
|
strcpy(file_name + segment_len, ".fdt");
|
1032
|
-
fr->fdt_in = store->open_input(store, file_name);
|
710
|
+
fr->fdt_in = store->open_input(store, segm_idx_name, file_name);
|
1033
711
|
strcpy(file_name + segment_len, ".fdx");
|
1034
|
-
fr->fdx_in = store->open_input(store, file_name);
|
712
|
+
fr->fdx_in = store->open_input(store, segm_idx_name, file_name);
|
1035
713
|
fr->size = frt_is_length(fr->fdx_in) / FIELDS_IDX_PTR_SIZE;
|
1036
714
|
fr->store = store;
|
1037
715
|
FRT_REF(store);
|
@@ -1057,32 +735,30 @@ void frt_fr_close(FrtFieldsReader *fr) {
|
|
1057
735
|
free(fr);
|
1058
736
|
}
|
1059
737
|
|
1060
|
-
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType
|
738
|
+
static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression_type) {
|
1061
739
|
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
1062
740
|
df->name = name;
|
1063
741
|
df->capa = df->size = size;
|
1064
|
-
df->data = FRT_ALLOC_N(char *, df->capa);
|
742
|
+
df->data = FRT_ALLOC_N(const char *, df->capa);
|
1065
743
|
df->lengths = FRT_ALLOC_N(int, df->capa);
|
1066
744
|
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
1067
|
-
df->destroy_data = true;
|
1068
745
|
df->boost = 1.0f;
|
1069
|
-
df->
|
746
|
+
df->compression_type = compression_type;
|
1070
747
|
return df;
|
1071
748
|
}
|
1072
749
|
|
1073
|
-
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType
|
750
|
+
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression_type) {
|
1074
751
|
int i;
|
1075
752
|
const int df_size = df->size;
|
1076
753
|
FrtInStream *fdt_in = fr->fdt_in;
|
1077
754
|
|
1078
755
|
for (i = 0; i < df_size; i++) {
|
1079
|
-
const int compressed_len = df->lengths[i]
|
1080
|
-
df->data[i] = frt_is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]),
|
756
|
+
const int compressed_len = df->lengths[i];
|
757
|
+
df->data[i] = frt_is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression_type);
|
1081
758
|
}
|
1082
759
|
}
|
1083
760
|
|
1084
|
-
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
1085
|
-
{
|
761
|
+
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num) {
|
1086
762
|
int i, j;
|
1087
763
|
frt_off_t pos;
|
1088
764
|
int stored_cnt;
|
@@ -1098,28 +774,29 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1098
774
|
for (i = 0; i < stored_cnt; i++) {
|
1099
775
|
const int field_num = frt_is_read_vint(fdt_in);
|
1100
776
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
1101
|
-
const int
|
1102
|
-
FrtDocField *df = frt_fr_df_new(fi->name,
|
777
|
+
const int df_field_count = frt_is_read_vint(fdt_in);
|
778
|
+
FrtDocField *df = frt_fr_df_new(fi->name, df_field_count, bits_get_compression_type(fi->bits));
|
1103
779
|
|
1104
|
-
for (j = 0; j <
|
780
|
+
for (j = 0; j < df_field_count; j++) {
|
1105
781
|
df->lengths[j] = frt_is_read_vint(fdt_in);
|
1106
782
|
df->encodings[j] = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1107
|
-
df->
|
783
|
+
df->compression_type = frt_is_read_vint(fdt_in);
|
1108
784
|
}
|
1109
785
|
|
1110
786
|
frt_doc_add_field(doc, df);
|
1111
787
|
}
|
1112
788
|
for (i = 0; i < stored_cnt; i++) {
|
1113
789
|
FrtDocField *df = doc->fields[i];
|
1114
|
-
if (df->
|
1115
|
-
frt_fr_read_compressed_fields(fr, df, df->
|
790
|
+
if (df->compression_type != FRT_COMPRESSION_NONE) {
|
791
|
+
frt_fr_read_compressed_fields(fr, df, df->compression_type);
|
1116
792
|
} else {
|
1117
793
|
const int df_size = df->size;
|
1118
794
|
for (j = 0; j < df_size; j++) {
|
1119
|
-
const int read_len = df->lengths[j]
|
1120
|
-
|
1121
|
-
frt_is_read_bytes(fdt_in, (frt_uchar *)
|
1122
|
-
|
795
|
+
const int read_len = df->lengths[j];
|
796
|
+
char *d = FRT_ALLOC_N(char, read_len + 1);
|
797
|
+
frt_is_read_bytes(fdt_in, (frt_uchar *)d, read_len);
|
798
|
+
d[read_len] = '\0';
|
799
|
+
df->data[j] = d;
|
1123
800
|
}
|
1124
801
|
}
|
1125
802
|
}
|
@@ -1127,8 +804,7 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1127
804
|
return doc;
|
1128
805
|
}
|
1129
806
|
|
1130
|
-
FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
1131
|
-
{
|
807
|
+
FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num) {
|
1132
808
|
int start = 0;
|
1133
809
|
int i, j;
|
1134
810
|
frt_off_t pos;
|
@@ -1146,18 +822,18 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1146
822
|
for (i = 0; i < stored_cnt; i++) {
|
1147
823
|
FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
|
1148
824
|
const int df_size = frt_is_read_vint(fdt_in);
|
1149
|
-
FrtLazyDocField *lazy_df = frt_lazy_df_new(fi->name, df_size,
|
825
|
+
FrtLazyDocField *lazy_df = frt_lazy_df_new(fi->name, df_size, bits_get_compression_type(fi->bits));
|
1150
826
|
const int field_start = start;
|
1151
827
|
/* get the starts relative positions this time around */
|
1152
828
|
|
1153
829
|
for (j = 0; j < df_size; j++) {
|
1154
830
|
lazy_df->data[j].start = start;
|
1155
|
-
start +=
|
831
|
+
start += (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
|
1156
832
|
lazy_df->data[j].encoding = rb_enc_from_index(frt_is_read_vint(fdt_in));
|
1157
|
-
lazy_df->data[j].
|
833
|
+
lazy_df->data[j].compression_type = frt_is_read_vint(fdt_in);
|
1158
834
|
}
|
1159
835
|
|
1160
|
-
lazy_df->len = start - field_start
|
836
|
+
lazy_df->len = start - field_start;
|
1161
837
|
frt_lazy_doc_add_field(lazy_doc, lazy_df, i);
|
1162
838
|
}
|
1163
839
|
/* correct the starts to their correct absolute positions */
|
@@ -1173,8 +849,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1173
849
|
return lazy_doc;
|
1174
850
|
}
|
1175
851
|
|
1176
|
-
static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num)
|
1177
|
-
{
|
852
|
+
static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num) {
|
1178
853
|
FrtTermVector *tv = FRT_ALLOC_AND_ZERO(FrtTermVector);
|
1179
854
|
FrtInStream *fdt_in = fr->fdt_in;
|
1180
855
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
@@ -1232,8 +907,7 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
|
|
1232
907
|
return tv;
|
1233
908
|
}
|
1234
909
|
|
1235
|
-
FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num)
|
1236
|
-
{
|
910
|
+
FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num) {
|
1237
911
|
FrtHash *term_vectors = frt_h_new_ptr((frt_free_ft)&frt_tv_destroy);
|
1238
912
|
int i;
|
1239
913
|
FrtInStream *fdx_in = fr->fdx_in;
|
@@ -1319,10 +993,10 @@ FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos
|
|
1319
993
|
memcpy(file_name, segment, segment_len);
|
1320
994
|
|
1321
995
|
strcpy(file_name + segment_len, ".fdt");
|
1322
|
-
fw->fdt_out = store->new_output(store, file_name);
|
996
|
+
fw->fdt_out = store->new_output(store, segm_idx_name, file_name);
|
1323
997
|
|
1324
998
|
strcpy(file_name + segment_len, ".fdx");
|
1325
|
-
fw->fdx_out = store->new_output(store, file_name);
|
999
|
+
fw->fdx_out = store->new_output(store, segm_idx_name, file_name);
|
1326
1000
|
|
1327
1001
|
fw->buffer = frt_ram_new_buffer();
|
1328
1002
|
|
@@ -1344,9 +1018,9 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1344
1018
|
int i, j, stored_cnt = 0;
|
1345
1019
|
FrtDocField *df;
|
1346
1020
|
FrtFieldInfo *fi;
|
1347
|
-
FrtCompressionType
|
1021
|
+
FrtCompressionType compression_type;
|
1348
1022
|
FrtOutStream *fdt_out = fw->fdt_out, *fdx_out = fw->fdx_out;
|
1349
|
-
const int doc_size = doc->
|
1023
|
+
const int doc_size = doc->field_count;
|
1350
1024
|
|
1351
1025
|
for (i = 0; i < doc_size; i++) {
|
1352
1026
|
df = doc->fields[i];
|
@@ -1370,23 +1044,20 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1370
1044
|
frt_os_write_vint(fdt_out, df_size);
|
1371
1045
|
|
1372
1046
|
if (bits_is_compressed(fi->bits)) {
|
1373
|
-
|
1047
|
+
compression_type = bits_get_compression_type(fi->bits);
|
1374
1048
|
for (j = 0; j < df_size; j++) {
|
1375
|
-
|
1376
|
-
|
1377
|
-
frt_os_write_vint(fdt_out, compressed_len - 1);
|
1049
|
+
int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], df->lengths[j], compression_type);
|
1050
|
+
frt_os_write_vint(fdt_out, compressed_len);
|
1378
1051
|
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
1379
|
-
frt_os_write_vint(fdt_out,
|
1052
|
+
frt_os_write_vint(fdt_out, compression_type);
|
1380
1053
|
}
|
1381
1054
|
} else {
|
1382
1055
|
for (j = 0; j < df_size; j++) {
|
1383
1056
|
const int length = df->lengths[j];
|
1057
|
+
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1384
1058
|
frt_os_write_vint(fdt_out, length);
|
1385
1059
|
frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
|
1386
|
-
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
1387
|
-
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1388
|
-
/* leave a space between fields as that is how they are analyzed */
|
1389
|
-
frt_os_write_byte(fw->buffer, ' ');
|
1060
|
+
frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
|
1390
1061
|
}
|
1391
1062
|
}
|
1392
1063
|
}
|
@@ -1413,8 +1084,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
1413
1084
|
FrtPostingList **plists,
|
1414
1085
|
int posting_count,
|
1415
1086
|
FrtOffset *offsets,
|
1416
|
-
int offset_count)
|
1417
|
-
{
|
1087
|
+
int offset_count) {
|
1418
1088
|
int i, delta_start, delta_length;
|
1419
1089
|
const char *last_term = FRT_EMPTY_STRING;
|
1420
1090
|
FrtOutStream *fdt_out = fw->fdt_out;
|
@@ -1478,19 +1148,16 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
1478
1148
|
|
1479
1149
|
#define TE(ste) ((FrtTermEnum *)ste)
|
1480
1150
|
|
1481
|
-
char *frt_te_get_term(FrtTermEnum *te)
|
1482
|
-
{
|
1151
|
+
char *frt_te_get_term(FrtTermEnum *te) {
|
1483
1152
|
return (char *)memcpy(FRT_ALLOC_N(char, te->curr_term_len + 1),
|
1484
1153
|
te->curr_term, te->curr_term_len + 1);
|
1485
1154
|
}
|
1486
1155
|
|
1487
|
-
FrtTermInfo *frt_te_get_ti(FrtTermEnum *te)
|
1488
|
-
{
|
1156
|
+
FrtTermInfo *frt_te_get_ti(FrtTermEnum *te) {
|
1489
1157
|
return (FrtTermInfo*)memcpy(FRT_ALLOC(FrtTermInfo), &(te->curr_ti), sizeof(FrtTermInfo));
|
1490
1158
|
}
|
1491
1159
|
|
1492
|
-
static char *te_skip_to(FrtTermEnum *te, const char *term)
|
1493
|
-
{
|
1160
|
+
static char *te_skip_to(FrtTermEnum *te, const char *term) {
|
1494
1161
|
char *curr_term = te->curr_term;
|
1495
1162
|
if (strcmp(curr_term, term) < 0) {
|
1496
1163
|
while (NULL != ((curr_term = te->next(te)))
|
@@ -1512,8 +1179,7 @@ static char *te_skip_to(FrtTermEnum *te, const char *term)
|
|
1512
1179
|
* SegmentTermIndex
|
1513
1180
|
****************************************************************************/
|
1514
1181
|
|
1515
|
-
static void sti_destroy(FrtSegmentTermIndex *sti)
|
1516
|
-
{
|
1182
|
+
static void sti_destroy(FrtSegmentTermIndex *sti) {
|
1517
1183
|
if (sti->index_terms) {
|
1518
1184
|
int i;
|
1519
1185
|
const int sti_index_cnt = sti->index_cnt;
|
@@ -1557,8 +1223,7 @@ static void sti_ensure_index_is_read(FrtSegmentTermIndex *sti, FrtTermEnum *inde
|
|
1557
1223
|
}
|
1558
1224
|
}
|
1559
1225
|
|
1560
|
-
static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
|
1561
|
-
{
|
1226
|
+
static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term) {
|
1562
1227
|
int lo = 0;
|
1563
1228
|
int hi = sti->index_cnt - 1;
|
1564
1229
|
int mid, delta;
|
@@ -1569,11 +1234,9 @@ static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
|
|
1569
1234
|
delta = strcmp(term, index_terms[mid]);
|
1570
1235
|
if (delta < 0) {
|
1571
1236
|
hi = mid - 1;
|
1572
|
-
}
|
1573
|
-
else if (delta > 0) {
|
1237
|
+
} else if (delta > 0) {
|
1574
1238
|
lo = mid + 1;
|
1575
|
-
}
|
1576
|
-
else {
|
1239
|
+
} else {
|
1577
1240
|
return mid;
|
1578
1241
|
}
|
1579
1242
|
}
|
@@ -1601,7 +1264,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
|
|
1601
1264
|
pthread_mutex_init(&sfi->mutex, NULL);
|
1602
1265
|
|
1603
1266
|
sprintf(file_name, "%s.tfx", segment);
|
1604
|
-
is = store->open_input(store, file_name);
|
1267
|
+
is = store->open_input(store, segm_idx_name, file_name);
|
1605
1268
|
field_count = (int)frt_is_read_u32(is);
|
1606
1269
|
sfi->index_interval = frt_is_read_vint(is);
|
1607
1270
|
sfi->skip_interval = frt_is_read_vint(is);
|
@@ -1620,7 +1283,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
|
|
1620
1283
|
frt_is_close(is);
|
1621
1284
|
|
1622
1285
|
sprintf(file_name, "%s.tix", segment);
|
1623
|
-
is = store->open_input(store, file_name);
|
1286
|
+
is = store->open_input(store, segm_idx_name, file_name);
|
1624
1287
|
FRT_DEREF(is);
|
1625
1288
|
sfi->index_te = frt_ste_new(is, sfi);
|
1626
1289
|
return sfi;
|
@@ -1750,13 +1413,11 @@ void frt_ste_close(FrtTermEnum *te) {
|
|
1750
1413
|
free(te);
|
1751
1414
|
}
|
1752
1415
|
|
1753
|
-
static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
1754
|
-
{
|
1416
|
+
static char *frt_ste_get_term(FrtTermEnum *te, int pos) {
|
1755
1417
|
FrtSegmentTermEnum *ste = STE(te);
|
1756
1418
|
if (pos >= ste->size) {
|
1757
1419
|
return NULL;
|
1758
|
-
}
|
1759
|
-
else if (pos != ste->pos) {
|
1420
|
+
} else if (pos != ste->pos) {
|
1760
1421
|
int idx_int = ste->sfi->index_interval;
|
1761
1422
|
if ((pos < ste->pos) || pos > (1 + ste->pos / idx_int) * idx_int) {
|
1762
1423
|
FrtSegmentTermIndex *sti = (FrtSegmentTermIndex *)frt_h_get_int(ste->sfi->field_dict, te->field_num);
|
@@ -1773,8 +1434,7 @@ static char *frt_ste_get_term(FrtTermEnum *te, int pos)
|
|
1773
1434
|
return te->curr_term;
|
1774
1435
|
}
|
1775
1436
|
|
1776
|
-
FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
|
1777
|
-
{
|
1437
|
+
FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi) {
|
1778
1438
|
FrtSegmentTermEnum *ste = ste_allocate();
|
1779
1439
|
|
1780
1440
|
TE(ste)->field_num = -1;
|
@@ -1794,8 +1454,7 @@ FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
|
|
1794
1454
|
|
1795
1455
|
#define MTE(te) ((MultiTermEnum *)(te))
|
1796
1456
|
|
1797
|
-
typedef struct TermEnumWrapper
|
1798
|
-
{
|
1457
|
+
typedef struct TermEnumWrapper {
|
1799
1458
|
int index;
|
1800
1459
|
FrtTermEnum *te;
|
1801
1460
|
int *doc_map;
|
@@ -1803,8 +1462,7 @@ typedef struct TermEnumWrapper
|
|
1803
1462
|
char *term;
|
1804
1463
|
} TermEnumWrapper;
|
1805
1464
|
|
1806
|
-
typedef struct MultiTermEnum
|
1807
|
-
{
|
1465
|
+
typedef struct MultiTermEnum {
|
1808
1466
|
FrtTermEnum te;
|
1809
1467
|
int doc_freq;
|
1810
1468
|
FrtPriorityQueue *tew_queue;
|
@@ -1816,29 +1474,24 @@ typedef struct MultiTermEnum
|
|
1816
1474
|
int *ti_indexes;
|
1817
1475
|
} MultiTermEnum;
|
1818
1476
|
|
1819
|
-
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
1820
|
-
{
|
1477
|
+
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2) {
|
1821
1478
|
int cmpres = strcmp(tew1->term, tew2->term);
|
1822
1479
|
if (0 == cmpres) {
|
1823
1480
|
return tew1->index < tew2->index;
|
1824
|
-
}
|
1825
|
-
else {
|
1481
|
+
} else {
|
1826
1482
|
return cmpres < 0;
|
1827
1483
|
}
|
1828
1484
|
}
|
1829
1485
|
|
1830
|
-
static char *tew_next(TermEnumWrapper *tew)
|
1831
|
-
{
|
1486
|
+
static char *tew_next(TermEnumWrapper *tew) {
|
1832
1487
|
return (tew->term = tew->te->next(tew->te));
|
1833
1488
|
}
|
1834
1489
|
|
1835
|
-
static char *tew_skip_to(TermEnumWrapper *tew, const char *term)
|
1836
|
-
{
|
1490
|
+
static char *tew_skip_to(TermEnumWrapper *tew, const char *term) {
|
1837
1491
|
return (tew->term = tew->te->skip_to(tew->te, term));
|
1838
1492
|
}
|
1839
1493
|
|
1840
|
-
static void tew_destroy(TermEnumWrapper *tew)
|
1841
|
-
{
|
1494
|
+
static void tew_destroy(TermEnumWrapper *tew) {
|
1842
1495
|
frt_ir_close(tew->ir);
|
1843
1496
|
if (tew->doc_map) {
|
1844
1497
|
free(tew->doc_map);
|
@@ -1905,8 +1558,7 @@ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num) {
|
|
1905
1558
|
if (tew_next(tew)) {
|
1906
1559
|
frt_pq_push(mte->tew_queue, tew); /* initialize queue */
|
1907
1560
|
}
|
1908
|
-
}
|
1909
|
-
else {
|
1561
|
+
} else {
|
1910
1562
|
sub_te->field_num = -1;
|
1911
1563
|
}
|
1912
1564
|
|
@@ -2009,7 +1661,7 @@ FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, con
|
|
2009
1661
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
2010
1662
|
|
2011
1663
|
sprintf(file_name, "%s.tis", segment);
|
2012
|
-
FrtInStream *is = store->open_input(store, file_name);
|
1664
|
+
FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
|
2013
1665
|
FRT_DEREF(is);
|
2014
1666
|
tir->orig_te = frt_ste_new(is, sfi);
|
2015
1667
|
tir->thread_te = 0;
|
@@ -2088,7 +1740,7 @@ void frt_tir_close(FrtTermInfosReader *tir) {
|
|
2088
1740
|
|
2089
1741
|
static FrtTermWriter *frt_tw_new(FrtStore *store, char *file_name) {
|
2090
1742
|
FrtTermWriter *tw = FRT_ALLOC_AND_ZERO(FrtTermWriter);
|
2091
|
-
tw->os = store->new_output(store, file_name);
|
1743
|
+
tw->os = store->new_output(store, segm_idx_name, file_name);
|
2092
1744
|
tw->last_term = FRT_EMPTY_STRING;
|
2093
1745
|
return tw;
|
2094
1746
|
}
|
@@ -2115,7 +1767,7 @@ FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index
|
|
2115
1767
|
strcpy(file_name + segment_len, ".tis");
|
2116
1768
|
tiw->tis_writer = frt_tw_new(store, file_name);
|
2117
1769
|
strcpy(file_name + segment_len, ".tfx");
|
2118
|
-
tiw->tfx_out = store->new_output(store, file_name);
|
1770
|
+
tiw->tfx_out = store->new_output(store, segm_idx_name, file_name);
|
2119
1771
|
frt_os_write_u32(tiw->tfx_out, 0); /* make space for field_count */
|
2120
1772
|
|
2121
1773
|
/* The following two numbers are the first numbers written to the field
|
@@ -2483,35 +2135,30 @@ static bool stpe_next(FrtTermDocEnum *tde) {
|
|
2483
2135
|
}
|
2484
2136
|
}
|
2485
2137
|
|
2486
|
-
static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2487
|
-
{
|
2138
|
+
static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2488
2139
|
(void)tde; (void)docs; (void)freqs; (void)req_num;
|
2489
2140
|
FRT_RAISE(FRT_ARG_ERROR, "TermPosEnum does not handle processing multiple documents"
|
2490
2141
|
" in one call. Use TermDocEnum instead.");
|
2491
2142
|
return -1;
|
2492
2143
|
}
|
2493
2144
|
|
2494
|
-
static int stpe_next_position(FrtTermDocEnum *tde)
|
2495
|
-
{
|
2145
|
+
static int stpe_next_position(FrtTermDocEnum *tde) {
|
2496
2146
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2497
2147
|
return (stde->prx_cnt-- > 0) ? stde->position += frt_is_read_vint(stde->prx_in)
|
2498
2148
|
: -1;
|
2499
2149
|
}
|
2500
2150
|
|
2501
|
-
static void stpe_close(FrtTermDocEnum *tde)
|
2502
|
-
{
|
2151
|
+
static void stpe_close(FrtTermDocEnum *tde) {
|
2503
2152
|
frt_is_close(STDE(tde)->prx_in);
|
2504
2153
|
STDE(tde)->prx_in = NULL;
|
2505
2154
|
stde_close(tde);
|
2506
2155
|
}
|
2507
2156
|
|
2508
|
-
static void stpe_skip_prox(FrtSegmentTermDocEnum *stde)
|
2509
|
-
{
|
2157
|
+
static void stpe_skip_prox(FrtSegmentTermDocEnum *stde) {
|
2510
2158
|
frt_is_skip_vints(stde->prx_in, stde->freq);
|
2511
2159
|
}
|
2512
2160
|
|
2513
|
-
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr)
|
2514
|
-
{
|
2161
|
+
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr) {
|
2515
2162
|
frt_is_seek(stde->prx_in, prx_ptr);
|
2516
2163
|
stde->prx_cnt = 0;
|
2517
2164
|
}
|
@@ -2520,8 +2167,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
|
|
2520
2167
|
FrtInStream *frq_in,
|
2521
2168
|
FrtInStream *prx_in,
|
2522
2169
|
FrtBitVector *del_docs,
|
2523
|
-
int skip_interval)
|
2524
|
-
{
|
2170
|
+
int skip_interval) {
|
2525
2171
|
FrtTermDocEnum *tde = frt_stde_new(tir, frq_in, del_docs, skip_interval);
|
2526
2172
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
2527
2173
|
|
@@ -2550,8 +2196,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
|
|
2550
2196
|
|
2551
2197
|
#define MTDE(tde) ((MultiTermDocEnum *)(tde))
|
2552
2198
|
|
2553
|
-
typedef struct MultiTermDocEnum
|
2554
|
-
{
|
2199
|
+
typedef struct MultiTermDocEnum {
|
2555
2200
|
FrtTermDocEnum tde;
|
2556
2201
|
int *starts;
|
2557
2202
|
int base;
|
@@ -2564,16 +2209,14 @@ typedef struct MultiTermDocEnum
|
|
2564
2209
|
FrtTermDocEnum *curr_tde;
|
2565
2210
|
} MultiTermDocEnum;
|
2566
2211
|
|
2567
|
-
static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
2568
|
-
{
|
2212
|
+
static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde) {
|
2569
2213
|
mtde->ptr++;
|
2570
2214
|
while (mtde->ptr < mtde->ir_cnt && !mtde->state[mtde->ptr]) {
|
2571
2215
|
mtde->ptr++;
|
2572
2216
|
}
|
2573
2217
|
if (mtde->ptr >= mtde->ir_cnt) {
|
2574
2218
|
return mtde->curr_tde = NULL;
|
2575
|
-
}
|
2576
|
-
else {
|
2219
|
+
} else {
|
2577
2220
|
FrtTermDocEnum *tde = mtde->irs_tde[mtde->ptr];
|
2578
2221
|
mtde->base = mtde->starts[mtde->ptr];
|
2579
2222
|
return mtde->curr_tde = tde;
|
@@ -2587,8 +2230,7 @@ static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
|
2587
2230
|
}\
|
2588
2231
|
} while (0)
|
2589
2232
|
|
2590
|
-
static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
2591
|
-
{
|
2233
|
+
static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te) {
|
2592
2234
|
int i;
|
2593
2235
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2594
2236
|
memset(mtde->state, 0, mtde->ir_cnt);
|
@@ -2598,11 +2240,9 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
2598
2240
|
mtde->state[index] = 1;
|
2599
2241
|
if (tde->close == stde_close) {
|
2600
2242
|
stde_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2601
|
-
}
|
2602
|
-
else if (tde->close == stpe_close) {
|
2243
|
+
} else if (tde->close == stpe_close) {
|
2603
2244
|
stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2604
|
-
}
|
2605
|
-
else {
|
2245
|
+
} else {
|
2606
2246
|
tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
|
2607
2247
|
}
|
2608
2248
|
}
|
@@ -2611,48 +2251,40 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
|
|
2611
2251
|
mtde_next_tde(mtde);
|
2612
2252
|
}
|
2613
2253
|
|
2614
|
-
static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
2615
|
-
{
|
2254
|
+
static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
2616
2255
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2617
2256
|
FrtTermEnum *te = mtde->te;
|
2618
2257
|
char *t;
|
2619
2258
|
te->set_field(te, field_num);
|
2620
2259
|
if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
|
2621
2260
|
mtde_seek_te(tde, te);
|
2622
|
-
}
|
2623
|
-
else {
|
2261
|
+
} else {
|
2624
2262
|
memset(mtde->state, 0, mtde->ir_cnt);
|
2625
2263
|
}
|
2626
2264
|
}
|
2627
2265
|
|
2628
|
-
static int mtde_doc_num(FrtTermDocEnum *tde)
|
2629
|
-
{
|
2266
|
+
static int mtde_doc_num(FrtTermDocEnum *tde) {
|
2630
2267
|
CHECK_CURR_TDE("doc_num");
|
2631
2268
|
return MTDE(tde)->base + MTDE(tde)->curr_tde->doc_num(MTDE(tde)->curr_tde);
|
2632
2269
|
}
|
2633
2270
|
|
2634
|
-
static int mtde_freq(FrtTermDocEnum *tde)
|
2635
|
-
{
|
2271
|
+
static int mtde_freq(FrtTermDocEnum *tde) {
|
2636
2272
|
CHECK_CURR_TDE("freq");
|
2637
2273
|
return MTDE(tde)->curr_tde->freq(MTDE(tde)->curr_tde);
|
2638
2274
|
}
|
2639
2275
|
|
2640
|
-
static bool mtde_next(FrtTermDocEnum *tde)
|
2641
|
-
{
|
2276
|
+
static bool mtde_next(FrtTermDocEnum *tde) {
|
2642
2277
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2643
2278
|
if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
|
2644
2279
|
return true;
|
2645
|
-
}
|
2646
|
-
else if (mtde_next_tde(mtde)) {
|
2280
|
+
} else if (mtde_next_tde(mtde)) {
|
2647
2281
|
return mtde_next(tde);
|
2648
|
-
}
|
2649
|
-
else {
|
2282
|
+
} else {
|
2650
2283
|
return false;
|
2651
2284
|
}
|
2652
2285
|
}
|
2653
2286
|
|
2654
|
-
static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2655
|
-
{
|
2287
|
+
static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2656
2288
|
int i, end = 0, last_end = 0, b;
|
2657
2289
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2658
2290
|
while (true) {
|
@@ -2661,24 +2293,21 @@ static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2661
2293
|
freqs + last_end, req_num - last_end);
|
2662
2294
|
if (end == last_end) { /* none left in segment */
|
2663
2295
|
if (!mtde_next_tde(mtde)) return end;
|
2664
|
-
}
|
2665
|
-
else { /* got some */
|
2296
|
+
} else { /* got some */
|
2666
2297
|
b = mtde->base; /* adjust doc numbers */
|
2667
2298
|
for (i = last_end; i < end; i++) {
|
2668
2299
|
docs[i] += b;
|
2669
2300
|
}
|
2670
2301
|
if (end == req_num) {
|
2671
2302
|
return end;
|
2672
|
-
}
|
2673
|
-
else {
|
2303
|
+
} else {
|
2674
2304
|
last_end = end;
|
2675
2305
|
}
|
2676
2306
|
}
|
2677
2307
|
}
|
2678
2308
|
}
|
2679
2309
|
|
2680
|
-
static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
2681
|
-
{
|
2310
|
+
static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
2682
2311
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2683
2312
|
FrtTermDocEnum *curr_tde;
|
2684
2313
|
while (NULL != (curr_tde = mtde->curr_tde)) {
|
@@ -2692,8 +2321,7 @@ static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2692
2321
|
return false;
|
2693
2322
|
}
|
2694
2323
|
|
2695
|
-
static void mtde_close(FrtTermDocEnum *tde)
|
2696
|
-
{
|
2324
|
+
static void mtde_close(FrtTermDocEnum *tde) {
|
2697
2325
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2698
2326
|
FrtTermDocEnum *tmp_tde;
|
2699
2327
|
int i = mtde->ir_cnt;
|
@@ -2708,8 +2336,7 @@ static void mtde_close(FrtTermDocEnum *tde)
|
|
2708
2336
|
free(tde);
|
2709
2337
|
}
|
2710
2338
|
|
2711
|
-
static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
|
2712
|
-
{
|
2339
|
+
static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr) {
|
2713
2340
|
MultiTermDocEnum *mtde = FRT_ALLOC_AND_ZERO(MultiTermDocEnum);
|
2714
2341
|
FrtTermDocEnum *tde = TDE(mtde);
|
2715
2342
|
tde->seek = &mtde_seek;
|
@@ -2731,8 +2358,7 @@ static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
|
|
2731
2358
|
return tde;
|
2732
2359
|
}
|
2733
2360
|
|
2734
|
-
static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
|
2735
|
-
{
|
2361
|
+
static FrtTermDocEnum *mtde_new(FrtMultiReader *mr) {
|
2736
2362
|
int i;
|
2737
2363
|
FrtTermDocEnum *tde = mtxe_new(mr);
|
2738
2364
|
tde->next_position = NULL;
|
@@ -2747,14 +2373,12 @@ static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
|
|
2747
2373
|
* MultiTermPosEnum
|
2748
2374
|
****************************************************************************/
|
2749
2375
|
|
2750
|
-
static int mtpe_next_position(FrtTermDocEnum *tde)
|
2751
|
-
{
|
2376
|
+
static int mtpe_next_position(FrtTermDocEnum *tde) {
|
2752
2377
|
CHECK_CURR_TDE("next_position");
|
2753
2378
|
return MTDE(tde)->curr_tde->next_position(MTDE(tde)->curr_tde);
|
2754
2379
|
}
|
2755
2380
|
|
2756
|
-
static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
|
2757
|
-
{
|
2381
|
+
static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr) {
|
2758
2382
|
int i;
|
2759
2383
|
FrtTermDocEnum *tde = mtxe_new(mr);
|
2760
2384
|
tde->next_position = &mtpe_next_position;
|
@@ -2774,8 +2398,7 @@ static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
|
|
2774
2398
|
#define MTDPE(tde) ((MultipleTermDocPosEnum *)(tde))
|
2775
2399
|
#define MTDPE_POS_QUEUE_INIT_CAPA 8
|
2776
2400
|
|
2777
|
-
typedef struct
|
2778
|
-
{
|
2401
|
+
typedef struct {
|
2779
2402
|
FrtTermDocEnum tde;
|
2780
2403
|
int doc_num;
|
2781
2404
|
int freq;
|
@@ -2790,8 +2413,7 @@ static void tde_destroy(FrtTermDocEnum *tde) {
|
|
2790
2413
|
tde->close(tde);
|
2791
2414
|
}
|
2792
2415
|
|
2793
|
-
static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
2794
|
-
{
|
2416
|
+
static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
2795
2417
|
(void)tde;
|
2796
2418
|
(void)field_num;
|
2797
2419
|
(void)term;
|
@@ -2799,18 +2421,15 @@ static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
|
|
2799
2421
|
" the #seek operation");
|
2800
2422
|
}
|
2801
2423
|
|
2802
|
-
static int mtdpe_doc_num(FrtTermDocEnum *tde)
|
2803
|
-
{
|
2424
|
+
static int mtdpe_doc_num(FrtTermDocEnum *tde) {
|
2804
2425
|
return MTDPE(tde)->doc_num;
|
2805
2426
|
}
|
2806
2427
|
|
2807
|
-
static int mtdpe_freq(FrtTermDocEnum *tde)
|
2808
|
-
{
|
2428
|
+
static int mtdpe_freq(FrtTermDocEnum *tde) {
|
2809
2429
|
return MTDPE(tde)->freq;
|
2810
2430
|
}
|
2811
2431
|
|
2812
|
-
static bool mtdpe_next(FrtTermDocEnum *tde)
|
2813
|
-
{
|
2432
|
+
static bool mtdpe_next(FrtTermDocEnum *tde) {
|
2814
2433
|
FrtTermDocEnum *sub_tde;
|
2815
2434
|
int pos = 0, freq = 0;
|
2816
2435
|
int doc;
|
@@ -2839,8 +2458,7 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
|
|
2839
2458
|
|
2840
2459
|
if (sub_tde->next(sub_tde)) {
|
2841
2460
|
frt_pq_down(mtdpe->pq);
|
2842
|
-
}
|
2843
|
-
else {
|
2461
|
+
} else {
|
2844
2462
|
sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe->pq);
|
2845
2463
|
sub_tde->close(sub_tde);
|
2846
2464
|
}
|
@@ -2856,13 +2474,11 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
|
|
2856
2474
|
return true;
|
2857
2475
|
}
|
2858
2476
|
|
2859
|
-
static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2)
|
2860
|
-
{
|
2477
|
+
static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2) {
|
2861
2478
|
return p1->doc_num(p1) < p2->doc_num(p2);
|
2862
2479
|
}
|
2863
2480
|
|
2864
|
-
static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
2865
|
-
{
|
2481
|
+
static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
2866
2482
|
FrtTermDocEnum *sub_tde;
|
2867
2483
|
FrtPriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
|
2868
2484
|
|
@@ -2870,8 +2486,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2870
2486
|
&& (target_doc_num > sub_tde->doc_num(sub_tde))) {
|
2871
2487
|
if (sub_tde->skip_to(sub_tde, target_doc_num)) {
|
2872
2488
|
frt_pq_down(mtdpe_pq);
|
2873
|
-
}
|
2874
|
-
else {
|
2489
|
+
} else {
|
2875
2490
|
sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe_pq);
|
2876
2491
|
sub_tde->close(sub_tde);
|
2877
2492
|
}
|
@@ -2879,8 +2494,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
|
|
2879
2494
|
return tde->next(tde);
|
2880
2495
|
}
|
2881
2496
|
|
2882
|
-
static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
2883
|
-
{
|
2497
|
+
static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
2884
2498
|
(void)tde;
|
2885
2499
|
(void)docs;
|
2886
2500
|
(void)freqs;
|
@@ -2889,21 +2503,18 @@ static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2889
2503
|
return req_num;
|
2890
2504
|
}
|
2891
2505
|
|
2892
|
-
static int mtdpe_next_position(FrtTermDocEnum *tde)
|
2893
|
-
{
|
2506
|
+
static int mtdpe_next_position(FrtTermDocEnum *tde) {
|
2894
2507
|
return MTDPE(tde)->pos_queue[MTDPE(tde)->pos_queue_index++];
|
2895
2508
|
}
|
2896
2509
|
|
2897
|
-
static void mtdpe_close(FrtTermDocEnum *tde)
|
2898
|
-
{
|
2510
|
+
static void mtdpe_close(FrtTermDocEnum *tde) {
|
2899
2511
|
frt_pq_clear(MTDPE(tde)->pq);
|
2900
2512
|
frt_pq_destroy(MTDPE(tde)->pq);
|
2901
2513
|
free(MTDPE(tde)->pos_queue);
|
2902
2514
|
free(tde);
|
2903
2515
|
}
|
2904
2516
|
|
2905
|
-
FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt)
|
2906
|
-
{
|
2517
|
+
FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt) {
|
2907
2518
|
int i;
|
2908
2519
|
MultipleTermDocPosEnum *mtdpe = FRT_ALLOC_AND_ZERO(MultipleTermDocPosEnum);
|
2909
2520
|
FrtTermDocEnum *tde = TDE(mtdpe);
|
@@ -2918,8 +2529,7 @@ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, i
|
|
2918
2529
|
tpe->seek(tpe, field_num, terms[i]);
|
2919
2530
|
if (tpe->next(tpe)) {
|
2920
2531
|
frt_pq_push(pq, tpe);
|
2921
|
-
}
|
2922
|
-
else {
|
2532
|
+
} else {
|
2923
2533
|
tpe->close(tpe);
|
2924
2534
|
}
|
2925
2535
|
}
|
@@ -2952,26 +2562,22 @@ static void file_name_filter_init(void) {
|
|
2952
2562
|
frt_register_for_cleanup(fn_extensions, (frt_free_ft)&frt_h_destroy);
|
2953
2563
|
}
|
2954
2564
|
|
2955
|
-
bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks)
|
2956
|
-
{
|
2565
|
+
bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks) {
|
2957
2566
|
char *p = strrchr(file_name, '.');
|
2958
2567
|
if (NULL == fn_extensions) file_name_filter_init();
|
2959
2568
|
if (NULL != p) {
|
2960
2569
|
char *extension = p + 1;
|
2961
2570
|
if (NULL != frt_h_get(fn_extensions, extension)) {
|
2962
2571
|
return true;
|
2963
|
-
}
|
2964
|
-
else if ((*extension == 'f' || *extension == 's')
|
2572
|
+
} else if ((*extension == 'f' || *extension == 's')
|
2965
2573
|
&& *(extension + 1) >= '0'
|
2966
2574
|
&& *(extension + 1) <= '9') {
|
2967
2575
|
return true;
|
2968
|
-
}
|
2969
|
-
else if (include_locks && (strcmp(extension, "lck") == 0)
|
2576
|
+
} else if (include_locks && (strcmp(extension, "lck") == 0)
|
2970
2577
|
&& (strncmp(file_name, "ferret", 6) == 0)) {
|
2971
2578
|
return true;
|
2972
2579
|
}
|
2973
|
-
}
|
2974
|
-
else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
|
2580
|
+
} else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
|
2975
2581
|
sizeof(FRT_SEGMENTS_FILE_NAME) - 1)) {
|
2976
2582
|
return true;
|
2977
2583
|
}
|
@@ -3007,8 +2613,8 @@ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
|
|
3007
2613
|
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
|
3008
2614
|
FrtStore *store = dlr->store;
|
3009
2615
|
FRT_TRY
|
3010
|
-
if (store->exists(store, file_name)) {
|
3011
|
-
store->remove(store, file_name);
|
2616
|
+
if (store->exists(store, segm_idx_name, file_name)) {
|
2617
|
+
store->remove(store, segm_idx_name, file_name);
|
3012
2618
|
}
|
3013
2619
|
frt_hs_del(dlr->pending, file_name);
|
3014
2620
|
FRT_XCATCHALL
|
@@ -3123,20 +2729,18 @@ void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
|
|
3123
2729
|
* info: */
|
3124
2730
|
frt_sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
|
3125
2731
|
|
3126
|
-
store->each(store, &frt_deleter_find_deletable_files_i, &dfa);
|
2732
|
+
store->each(store, segm_idx_name, &frt_deleter_find_deletable_files_i, &dfa);
|
3127
2733
|
frt_h_destroy(dfa.current);
|
3128
2734
|
}
|
3129
2735
|
|
3130
|
-
static void deleter_delete_deletable_files(FrtDeleter *dlr)
|
3131
|
-
{
|
2736
|
+
static void deleter_delete_deletable_files(FrtDeleter *dlr) {
|
3132
2737
|
frt_deleter_find_deletable_files(dlr);
|
3133
2738
|
deleter_commit_pending_deletions(dlr);
|
3134
2739
|
}
|
3135
2740
|
|
3136
2741
|
/*
|
3137
2742
|
TODO: currently not used. Why not?
|
3138
|
-
static void deleter_clear_pending_deletions(FrtDeleter *dlr)
|
3139
|
-
{
|
2743
|
+
static void deleter_clear_pending_deletions(FrtDeleter *dlr) {
|
3140
2744
|
frt_hs_clear(dlr->pending);
|
3141
2745
|
}
|
3142
2746
|
*/
|
@@ -3147,14 +2751,12 @@ static void deleter_clear_pending_deletions(FrtDeleter *dlr)
|
|
3147
2751
|
*
|
3148
2752
|
****************************************************************************/
|
3149
2753
|
|
3150
|
-
static void ir_acquire_not_necessary(FrtIndexReader *ir)
|
3151
|
-
{
|
2754
|
+
static void ir_acquire_not_necessary(FrtIndexReader *ir) {
|
3152
2755
|
(void)ir;
|
3153
2756
|
}
|
3154
2757
|
|
3155
2758
|
#define I64_PFX POSH_I64_PRINTF_PREFIX
|
3156
|
-
static void ir_acquire_write_lock(FrtIndexReader *ir)
|
3157
|
-
{
|
2759
|
+
static void ir_acquire_write_lock(FrtIndexReader *ir) {
|
3158
2760
|
if (ir->is_stale) {
|
3159
2761
|
FRT_RAISE(FRT_STATE_ERROR, "IndexReader out of date and no longer valid for "
|
3160
2762
|
"delete, undelete, or set_norm operations. To "
|
@@ -3163,7 +2765,7 @@ static void ir_acquire_write_lock(FrtIndexReader *ir)
|
|
3163
2765
|
}
|
3164
2766
|
|
3165
2767
|
if (NULL == ir->write_lock) {
|
3166
|
-
ir->write_lock = frt_open_lock(ir->store, FRT_WRITE_LOCK_NAME);
|
2768
|
+
ir->write_lock = frt_open_lock(ir->store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
3167
2769
|
if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
|
3168
2770
|
FRT_RAISE(FRT_LOCK_ERROR, "Could not obtain write lock when trying to "
|
3169
2771
|
"write changes to the index. Check that there "
|
@@ -3209,8 +2811,7 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3209
2811
|
ir->is_owner = is_owner;
|
3210
2812
|
if (is_owner) {
|
3211
2813
|
ir->acquire_write_lock = &ir_acquire_write_lock;
|
3212
|
-
}
|
3213
|
-
else {
|
2814
|
+
} else {
|
3214
2815
|
ir->acquire_write_lock = &ir_acquire_not_necessary;
|
3215
2816
|
}
|
3216
2817
|
|
@@ -3221,8 +2822,7 @@ int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term) {
|
|
3221
2822
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3222
2823
|
if (field_num >= 0) {
|
3223
2824
|
return ir->doc_freq(ir, field_num, term);
|
3224
|
-
}
|
3225
|
-
else {
|
2825
|
+
} else {
|
3226
2826
|
return 0;
|
3227
2827
|
}
|
3228
2828
|
}
|
@@ -3242,8 +2842,7 @@ void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val) {
|
|
3242
2842
|
}
|
3243
2843
|
}
|
3244
2844
|
|
3245
|
-
frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num)
|
3246
|
-
{
|
2845
|
+
frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num) {
|
3247
2846
|
frt_uchar *norms = NULL;
|
3248
2847
|
if (field_num >= 0) {
|
3249
2848
|
norms = ir->get_norms(ir, field_num);
|
@@ -3266,15 +2865,13 @@ frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
|
|
3266
2865
|
int field_num = frt_fis_get_field_num(ir->fis, field);
|
3267
2866
|
if (field_num >= 0) {
|
3268
2867
|
ir->get_norms_into(ir, field_num, buf);
|
3269
|
-
}
|
3270
|
-
else {
|
2868
|
+
} else {
|
3271
2869
|
memset(buf, 0, ir->max_doc(ir));
|
3272
2870
|
}
|
3273
2871
|
return buf;
|
3274
2872
|
}
|
3275
2873
|
|
3276
|
-
void frt_ir_undelete_all(FrtIndexReader *ir)
|
3277
|
-
{
|
2874
|
+
void frt_ir_undelete_all(FrtIndexReader *ir) {
|
3278
2875
|
pthread_mutex_lock(&ir->mutex);
|
3279
2876
|
ir->acquire_write_lock(ir);
|
3280
2877
|
ir->undelete_all_i(ir);
|
@@ -3282,8 +2879,7 @@ void frt_ir_undelete_all(FrtIndexReader *ir)
|
|
3282
2879
|
pthread_mutex_unlock(&ir->mutex);
|
3283
2880
|
}
|
3284
2881
|
|
3285
|
-
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num)
|
3286
|
-
{
|
2882
|
+
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num) {
|
3287
2883
|
if (doc_num >= 0 && doc_num < ir->max_doc(ir)) {
|
3288
2884
|
pthread_mutex_lock(&ir->mutex);
|
3289
2885
|
ir->acquire_write_lock(ir);
|
@@ -3342,8 +2938,7 @@ FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const ch
|
|
3342
2938
|
return tde;
|
3343
2939
|
}
|
3344
2940
|
|
3345
|
-
static void ir_commit_i(FrtIndexReader *ir)
|
3346
|
-
{
|
2941
|
+
static void ir_commit_i(FrtIndexReader *ir) {
|
3347
2942
|
if (ir->has_changes) {
|
3348
2943
|
if (NULL == ir->deleter && NULL != ir->store) {
|
3349
2944
|
/* In the MultiReader case, we share this deleter across all
|
@@ -3369,16 +2964,14 @@ static void ir_commit_i(FrtIndexReader *ir)
|
|
3369
2964
|
frt_close_lock(ir->write_lock);
|
3370
2965
|
ir->write_lock = NULL;
|
3371
2966
|
}
|
3372
|
-
}
|
3373
|
-
else {
|
2967
|
+
} else {
|
3374
2968
|
ir->commit_i(ir);
|
3375
2969
|
}
|
3376
2970
|
}
|
3377
2971
|
ir->has_changes = false;
|
3378
2972
|
}
|
3379
2973
|
|
3380
|
-
void frt_ir_commit(FrtIndexReader *ir)
|
3381
|
-
{
|
2974
|
+
void frt_ir_commit(FrtIndexReader *ir) {
|
3382
2975
|
pthread_mutex_lock(&ir->mutex);
|
3383
2976
|
ir_commit_i(ir);
|
3384
2977
|
pthread_mutex_unlock(&ir->mutex);
|
@@ -3411,15 +3004,13 @@ void frt_ir_close(FrtIndexReader *ir) {
|
|
3411
3004
|
/**
|
3412
3005
|
* Don't call this method if the cache already exists
|
3413
3006
|
**/
|
3414
|
-
void frt_ir_add_cache(FrtIndexReader *ir)
|
3415
|
-
{
|
3007
|
+
void frt_ir_add_cache(FrtIndexReader *ir) {
|
3416
3008
|
if (NULL == ir->cache) {
|
3417
3009
|
ir->cache = frt_co_hash_create();
|
3418
3010
|
}
|
3419
3011
|
}
|
3420
3012
|
|
3421
|
-
bool frt_ir_is_latest(FrtIndexReader *ir)
|
3422
|
-
{
|
3013
|
+
bool frt_ir_is_latest(FrtIndexReader *ir) {
|
3423
3014
|
return ir->is_latest_i(ir);
|
3424
3015
|
}
|
3425
3016
|
|
@@ -3434,8 +3025,7 @@ typedef struct Norm {
|
|
3434
3025
|
bool is_dirty : 1;
|
3435
3026
|
} Norm;
|
3436
3027
|
|
3437
|
-
static Norm *norm_create(FrtInStream *is, int field_num)
|
3438
|
-
{
|
3028
|
+
static Norm *norm_create(FrtInStream *is, int field_num) {
|
3439
3029
|
Norm *norm = FRT_ALLOC(Norm);
|
3440
3030
|
|
3441
3031
|
norm->is = is;
|
@@ -3447,8 +3037,7 @@ static Norm *norm_create(FrtInStream *is, int field_num)
|
|
3447
3037
|
return norm;
|
3448
3038
|
}
|
3449
3039
|
|
3450
|
-
static void norm_destroy(Norm *norm)
|
3451
|
-
{
|
3040
|
+
static void norm_destroy(Norm *norm) {
|
3452
3041
|
frt_is_close(norm->is);
|
3453
3042
|
if (NULL != norm->bytes) {
|
3454
3043
|
free(norm->bytes);
|
@@ -3457,8 +3046,7 @@ static void norm_destroy(Norm *norm)
|
|
3457
3046
|
}
|
3458
3047
|
|
3459
3048
|
static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
3460
|
-
FrtSegmentInfo *si, int doc_count)
|
3461
|
-
{
|
3049
|
+
FrtSegmentInfo *si, int doc_count) {
|
3462
3050
|
FrtOutStream *os;
|
3463
3051
|
char norm_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3464
3052
|
const int field_num = norm->field_num;
|
@@ -3468,7 +3056,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
|
3468
3056
|
}
|
3469
3057
|
frt_si_advance_norm_gen(si, field_num);
|
3470
3058
|
si_norm_file_name(si, norm_file_name, field_num);
|
3471
|
-
os = store->new_output(store, norm_file_name);
|
3059
|
+
os = store->new_output(store, segm_idx_name, norm_file_name);
|
3472
3060
|
frt_os_write_bytes(os, norm->bytes, doc_count);
|
3473
3061
|
frt_os_close(os);
|
3474
3062
|
norm->is_dirty = false;
|
@@ -3492,8 +3080,7 @@ static FrtFieldsReader *sr_fr(FrtSegmentReader *sr) {
|
|
3492
3080
|
return fr;
|
3493
3081
|
}
|
3494
3082
|
|
3495
|
-
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num)
|
3496
|
-
{
|
3083
|
+
static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num) {
|
3497
3084
|
return (NULL != sr->deleted_docs && frt_bv_get(sr->deleted_docs, doc_num));
|
3498
3085
|
}
|
3499
3086
|
|
@@ -3561,10 +3148,9 @@ static void sr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
|
|
3561
3148
|
ir->deleter = deleter;
|
3562
3149
|
}
|
3563
3150
|
|
3564
|
-
static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
|
3565
|
-
{
|
3151
|
+
static void bv_write(FrtBitVector *bv, FrtStore *store, char *name) {
|
3566
3152
|
int i;
|
3567
|
-
FrtOutStream *os = store->new_output(store, name);
|
3153
|
+
FrtOutStream *os = store->new_output(store, segm_idx_name, name);
|
3568
3154
|
frt_os_write_vint(os, bv->size);
|
3569
3155
|
for (i = ((bv->size-1) >> 5); i >= 0; i--) {
|
3570
3156
|
frt_os_write_u32(os, bv->bits[i]);
|
@@ -3572,11 +3158,10 @@ static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
|
|
3572
3158
|
frt_os_close(os);
|
3573
3159
|
}
|
3574
3160
|
|
3575
|
-
static FrtBitVector *bv_read(FrtStore *store, char *name)
|
3576
|
-
{
|
3161
|
+
static FrtBitVector *bv_read(FrtStore *store, char *name) {
|
3577
3162
|
int i;
|
3578
3163
|
volatile bool success = false;
|
3579
|
-
FrtInStream *volatile is = store->open_input(store, name);
|
3164
|
+
FrtInStream *volatile is = store->open_input(store, segm_idx_name, name);
|
3580
3165
|
FrtBitVector *volatile bv = FRT_ALLOC_AND_ZERO(FrtBitVector);
|
3581
3166
|
bv->size = (int)frt_is_read_vint(is);
|
3582
3167
|
bv->capa = (bv->size >> 5) + 1;
|
@@ -3595,13 +3180,11 @@ static FrtBitVector *bv_read(FrtStore *store, char *name)
|
|
3595
3180
|
return bv;
|
3596
3181
|
}
|
3597
3182
|
|
3598
|
-
static bool sr_is_latest_i(FrtIndexReader *ir)
|
3599
|
-
{
|
3183
|
+
static bool sr_is_latest_i(FrtIndexReader *ir) {
|
3600
3184
|
return (frt_sis_read_current_version(ir->store) == ir->sis->version);
|
3601
3185
|
}
|
3602
3186
|
|
3603
|
-
static void sr_commit_i(FrtIndexReader *ir)
|
3604
|
-
{
|
3187
|
+
static void sr_commit_i(FrtIndexReader *ir) {
|
3605
3188
|
FrtSegmentInfo *si = SR(ir)->si;
|
3606
3189
|
char *segment = SR(ir)->si->name;
|
3607
3190
|
char tmp_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -3614,8 +3197,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
3614
3197
|
if (SR(ir)->undelete_all) {
|
3615
3198
|
si->del_gen = -1;
|
3616
3199
|
SR(ir)->undelete_all = false;
|
3617
|
-
}
|
3618
|
-
else {
|
3200
|
+
} else {
|
3619
3201
|
/* (SR(ir)->deleted_docs_dirty) re-write deleted */
|
3620
3202
|
si->del_gen++;
|
3621
3203
|
frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
@@ -3658,8 +3240,7 @@ static void sr_close_i(FrtIndexReader *ir) {
|
|
3658
3240
|
}
|
3659
3241
|
}
|
3660
3242
|
|
3661
|
-
static int sr_num_docs(FrtIndexReader *ir)
|
3662
|
-
{
|
3243
|
+
static int sr_num_docs(FrtIndexReader *ir) {
|
3663
3244
|
int num_docs;
|
3664
3245
|
|
3665
3246
|
pthread_mutex_lock(&ir->mutex);
|
@@ -3671,13 +3252,11 @@ static int sr_num_docs(FrtIndexReader *ir)
|
|
3671
3252
|
return num_docs;
|
3672
3253
|
}
|
3673
3254
|
|
3674
|
-
static int sr_max_doc(FrtIndexReader *ir)
|
3675
|
-
{
|
3255
|
+
static int sr_max_doc(FrtIndexReader *ir) {
|
3676
3256
|
return SR(ir)->fr->size;
|
3677
3257
|
}
|
3678
3258
|
|
3679
|
-
static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
|
3680
|
-
{
|
3259
|
+
static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num) {
|
3681
3260
|
FrtDocument *doc;
|
3682
3261
|
pthread_mutex_lock(&ir->mutex);
|
3683
3262
|
if (sr_is_deleted_i(SR(ir), doc_num)) {
|
@@ -3689,8 +3268,7 @@ static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
|
|
3689
3268
|
return doc;
|
3690
3269
|
}
|
3691
3270
|
|
3692
|
-
static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
3693
|
-
{
|
3271
|
+
static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
|
3694
3272
|
FrtLazyDoc *lazy_doc;
|
3695
3273
|
pthread_mutex_lock(&ir->mutex);
|
3696
3274
|
if (sr_is_deleted_i(SR(ir), doc_num)) {
|
@@ -3702,8 +3280,7 @@ static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
|
3702
3280
|
return lazy_doc;
|
3703
3281
|
}
|
3704
3282
|
|
3705
|
-
static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
|
3706
|
-
{
|
3283
|
+
static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num) {
|
3707
3284
|
frt_uchar *norms;
|
3708
3285
|
pthread_mutex_lock(&ir->mutex);
|
3709
3286
|
norms = sr_get_norms_i(SR(ir), field_num);
|
@@ -3712,23 +3289,20 @@ static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
|
|
3712
3289
|
}
|
3713
3290
|
|
3714
3291
|
static frt_uchar *sr_get_norms_into(FrtIndexReader *ir, int field_num,
|
3715
|
-
frt_uchar *buf)
|
3716
|
-
{
|
3292
|
+
frt_uchar *buf) {
|
3717
3293
|
pthread_mutex_lock(&ir->mutex);
|
3718
3294
|
sr_get_norms_into_i(SR(ir), field_num, buf);
|
3719
3295
|
pthread_mutex_unlock(&ir->mutex);
|
3720
3296
|
return buf;
|
3721
3297
|
}
|
3722
3298
|
|
3723
|
-
static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num)
|
3724
|
-
{
|
3299
|
+
static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num) {
|
3725
3300
|
FrtTermEnum *te = SR(ir)->tir->orig_te;
|
3726
3301
|
te = frt_ste_clone(te);
|
3727
3302
|
return ste_set_field(te, field_num);
|
3728
3303
|
}
|
3729
3304
|
|
3730
|
-
static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
|
3731
|
-
{
|
3305
|
+
static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
|
3732
3306
|
FrtTermEnum *te = SR(ir)->tir->orig_te;
|
3733
3307
|
te = frt_ste_clone(te);
|
3734
3308
|
ste_set_field(te, field_num);
|
@@ -3736,20 +3310,17 @@ static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char
|
|
3736
3310
|
return te;
|
3737
3311
|
}
|
3738
3312
|
|
3739
|
-
static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term)
|
3740
|
-
{
|
3313
|
+
static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term) {
|
3741
3314
|
FrtTermInfo *ti = frt_tir_get_ti(frt_tir_set_field(SR(ir)->tir, field_num), term);
|
3742
3315
|
return ti ? ti->doc_freq : 0;
|
3743
3316
|
}
|
3744
3317
|
|
3745
|
-
static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir)
|
3746
|
-
{
|
3318
|
+
static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir) {
|
3747
3319
|
return frt_stde_new(SR(ir)->tir, SR(ir)->frq_in, SR(ir)->deleted_docs,
|
3748
3320
|
STE(SR(ir)->tir->orig_te)->skip_interval);
|
3749
3321
|
}
|
3750
3322
|
|
3751
|
-
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir)
|
3752
|
-
{
|
3323
|
+
static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir) {
|
3753
3324
|
FrtSegmentReader *sr = SR(ir);
|
3754
3325
|
return frt_stpe_new(sr->tir, sr->frq_in, sr->prx_in, sr->deleted_docs,
|
3755
3326
|
STE(sr->tir->orig_te)->skip_interval);
|
@@ -3766,8 +3337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
3766
3337
|
return frt_fr_get_field_tv(fr, doc_num, fi->number);
|
3767
3338
|
}
|
3768
3339
|
|
3769
|
-
static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
|
3770
|
-
{
|
3340
|
+
static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num) {
|
3771
3341
|
FrtFieldsReader *fr;
|
3772
3342
|
if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
|
3773
3343
|
return NULL;
|
@@ -3776,8 +3346,7 @@ static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
|
|
3776
3346
|
return frt_fr_get_tv(fr, doc_num);
|
3777
3347
|
}
|
3778
3348
|
|
3779
|
-
static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
|
3780
|
-
{
|
3349
|
+
static bool sr_is_deleted(FrtIndexReader *ir, int doc_num) {
|
3781
3350
|
bool is_del;
|
3782
3351
|
|
3783
3352
|
pthread_mutex_lock(&ir->mutex);
|
@@ -3787,13 +3356,11 @@ static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
|
|
3787
3356
|
return is_del;
|
3788
3357
|
}
|
3789
3358
|
|
3790
|
-
static bool sr_has_deletions(FrtIndexReader *ir)
|
3791
|
-
{
|
3359
|
+
static bool sr_has_deletions(FrtIndexReader *ir) {
|
3792
3360
|
return NULL != SR(ir)->deleted_docs;
|
3793
3361
|
}
|
3794
3362
|
|
3795
|
-
static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
3796
|
-
{
|
3363
|
+
static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store) {
|
3797
3364
|
int i;
|
3798
3365
|
FrtSegmentInfo *si = SR(ir)->si;
|
3799
3366
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -3801,7 +3368,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
3801
3368
|
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
3802
3369
|
FrtStore *store = ir->store;
|
3803
3370
|
if (si_norm_file_name(si, file_name, i)) {
|
3804
|
-
FrtInStream *is = store->open_input(store, file_name);
|
3371
|
+
FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
|
3805
3372
|
FRT_DEREF(is);
|
3806
3373
|
frt_h_set_int(SR(ir)->norms, i, norm_create(is, i));
|
3807
3374
|
}
|
@@ -3809,8 +3376,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
|
|
3809
3376
|
SR(ir)->norms_dirty = false;
|
3810
3377
|
}
|
3811
3378
|
|
3812
|
-
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
3813
|
-
{
|
3379
|
+
static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr) {
|
3814
3380
|
FrtStore *volatile store = sr->si->store;
|
3815
3381
|
FrtIndexReader *ir = IR(sr);
|
3816
3382
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -3859,12 +3425,12 @@ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
|
|
3859
3425
|
}
|
3860
3426
|
|
3861
3427
|
sprintf(file_name, "%s.frq", sr_segment);
|
3862
|
-
sr->frq_in = store->open_input(store, file_name);
|
3428
|
+
sr->frq_in = store->open_input(store, segm_idx_name, file_name);
|
3863
3429
|
sprintf(file_name, "%s.prx", sr_segment);
|
3864
|
-
sr->prx_in = store->open_input(store, file_name);
|
3430
|
+
sr->prx_in = store->open_input(store, segm_idx_name, file_name);
|
3865
3431
|
sr->norms = frt_h_new_int((frt_free_ft)&norm_destroy);
|
3866
3432
|
sr_open_norms(ir, store);
|
3867
|
-
if (
|
3433
|
+
if (frt_fis_has_vectors(ir->fis)) {
|
3868
3434
|
frb_thread_key_create(&sr->thread_fr, NULL);
|
3869
3435
|
sr->fr_bucket = frt_ary_new();
|
3870
3436
|
}
|
@@ -3894,8 +3460,7 @@ static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_
|
|
3894
3460
|
|
3895
3461
|
#define MR(ir) ((FrtMultiReader *)(ir))
|
3896
3462
|
|
3897
|
-
static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
3898
|
-
{
|
3463
|
+
static int mr_reader_index_i(FrtMultiReader *mr, int doc_num) {
|
3899
3464
|
int lo = 0; /* search @starts array */
|
3900
3465
|
int hi = mr->r_cnt - 1; /* for first element less */
|
3901
3466
|
int mid;
|
@@ -3906,11 +3471,9 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
|
3906
3471
|
mid_value = mr->starts[mid];
|
3907
3472
|
if (doc_num < mid_value) {
|
3908
3473
|
hi = mid - 1;
|
3909
|
-
}
|
3910
|
-
else if (doc_num > mid_value) {
|
3474
|
+
} else if (doc_num > mid_value) {
|
3911
3475
|
lo = mid + 1;
|
3912
|
-
}
|
3913
|
-
else { /* found a match */
|
3476
|
+
} else { /* found a match */
|
3914
3477
|
while ((mid+1 < mr->r_cnt) && (mr->starts[mid+1] == mid_value)) {
|
3915
3478
|
mid += 1; /* scan to last match in case we have empty segments */
|
3916
3479
|
}
|
@@ -3920,8 +3483,7 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
|
|
3920
3483
|
return hi;
|
3921
3484
|
}
|
3922
3485
|
|
3923
|
-
static int mr_num_docs(FrtIndexReader *ir)
|
3924
|
-
{
|
3486
|
+
static int mr_num_docs(FrtIndexReader *ir) {
|
3925
3487
|
int i, num_docs;
|
3926
3488
|
pthread_mutex_lock(&ir->mutex);
|
3927
3489
|
if (MR(ir)->num_docs_cache == -1) {
|
@@ -3938,8 +3500,7 @@ static int mr_num_docs(FrtIndexReader *ir)
|
|
3938
3500
|
return num_docs;
|
3939
3501
|
}
|
3940
3502
|
|
3941
|
-
static int mr_max_doc(FrtIndexReader *ir)
|
3942
|
-
{
|
3503
|
+
static int mr_max_doc(FrtIndexReader *ir) {
|
3943
3504
|
return MR(ir)->max_doc;
|
3944
3505
|
}
|
3945
3506
|
|
@@ -3947,30 +3508,25 @@ static int mr_max_doc(FrtIndexReader *ir)
|
|
3947
3508
|
int i = mr_reader_index_i(MR(ir), doc_num);\
|
3948
3509
|
FrtIndexReader *reader = MR(ir)->sub_readers[i]
|
3949
3510
|
|
3950
|
-
static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num)
|
3951
|
-
{
|
3511
|
+
static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num) {
|
3952
3512
|
GET_READER();
|
3953
3513
|
return reader->get_doc(reader, doc_num - MR(ir)->starts[i]);
|
3954
3514
|
}
|
3955
3515
|
|
3956
|
-
static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
|
3957
|
-
{
|
3516
|
+
static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
|
3958
3517
|
GET_READER();
|
3959
3518
|
return reader->get_lazy_doc(reader, doc_num - MR(ir)->starts[i]);
|
3960
3519
|
}
|
3961
3520
|
|
3962
|
-
int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num)
|
3963
|
-
{
|
3521
|
+
int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num) {
|
3964
3522
|
if (mr->field_num_map) {
|
3965
3523
|
return mr->field_num_map[ir_num][f_num];
|
3966
|
-
}
|
3967
|
-
else {
|
3524
|
+
} else {
|
3968
3525
|
return f_num;
|
3969
3526
|
}
|
3970
3527
|
}
|
3971
3528
|
|
3972
|
-
static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
|
3973
|
-
{
|
3529
|
+
static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num) {
|
3974
3530
|
frt_uchar *bytes;
|
3975
3531
|
|
3976
3532
|
pthread_mutex_lock(&ir->mutex);
|
@@ -3995,16 +3551,14 @@ static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
|
|
3995
3551
|
return bytes;
|
3996
3552
|
}
|
3997
3553
|
|
3998
|
-
static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf)
|
3999
|
-
{
|
3554
|
+
static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf) {
|
4000
3555
|
frt_uchar *bytes;
|
4001
3556
|
|
4002
3557
|
pthread_mutex_lock(&ir->mutex);
|
4003
3558
|
bytes = (frt_uchar *)frt_h_get_int(MR(ir)->norms_cache, field_num);
|
4004
3559
|
if (NULL != bytes) {
|
4005
3560
|
memcpy(buf, bytes, MR(ir)->max_doc);
|
4006
|
-
}
|
4007
|
-
else {
|
3561
|
+
} else {
|
4008
3562
|
int i;
|
4009
3563
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4010
3564
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4019,18 +3573,15 @@ static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar
|
|
4019
3573
|
return buf;
|
4020
3574
|
}
|
4021
3575
|
|
4022
|
-
static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num)
|
4023
|
-
{
|
3576
|
+
static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num) {
|
4024
3577
|
return frt_mte_new(MR(ir), field_num, NULL);
|
4025
3578
|
}
|
4026
3579
|
|
4027
|
-
static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
|
4028
|
-
{
|
3580
|
+
static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
|
4029
3581
|
return frt_mte_new(MR(ir), field_num, term);
|
4030
3582
|
}
|
4031
3583
|
|
4032
|
-
static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
|
4033
|
-
{
|
3584
|
+
static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t) {
|
4034
3585
|
int total = 0; /* sum freqs in segments */
|
4035
3586
|
int i = MR(ir)->r_cnt;
|
4036
3587
|
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
@@ -4043,13 +3594,11 @@ static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
|
|
4043
3594
|
return total;
|
4044
3595
|
}
|
4045
3596
|
|
4046
|
-
static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir)
|
4047
|
-
{
|
3597
|
+
static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir) {
|
4048
3598
|
return mtde_new(MR(ir));
|
4049
3599
|
}
|
4050
3600
|
|
4051
|
-
static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir)
|
4052
|
-
{
|
3601
|
+
static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir) {
|
4053
3602
|
return mtpe_new(MR(ir));
|
4054
3603
|
}
|
4055
3604
|
|
@@ -4058,25 +3607,21 @@ static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
4058
3607
|
return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
|
4059
3608
|
}
|
4060
3609
|
|
4061
|
-
static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num)
|
4062
|
-
{
|
3610
|
+
static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num) {
|
4063
3611
|
GET_READER();
|
4064
3612
|
return reader->term_vectors(reader, doc_num - MR(ir)->starts[i]);
|
4065
3613
|
}
|
4066
3614
|
|
4067
|
-
static bool mr_is_deleted(FrtIndexReader *ir, int doc_num)
|
4068
|
-
{
|
3615
|
+
static bool mr_is_deleted(FrtIndexReader *ir, int doc_num) {
|
4069
3616
|
GET_READER();
|
4070
3617
|
return reader->is_deleted(reader, doc_num - MR(ir)->starts[i]);
|
4071
3618
|
}
|
4072
3619
|
|
4073
|
-
static bool mr_has_deletions(FrtIndexReader *ir)
|
4074
|
-
{
|
3620
|
+
static bool mr_has_deletions(FrtIndexReader *ir) {
|
4075
3621
|
return MR(ir)->has_deletions;
|
4076
3622
|
}
|
4077
3623
|
|
4078
|
-
static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val)
|
4079
|
-
{
|
3624
|
+
static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val) {
|
4080
3625
|
int i = mr_reader_index_i(MR(ir), doc_num);
|
4081
3626
|
int fnum = frt_mr_get_field_num(MR(ir), i, field_num);
|
4082
3627
|
if (fnum >= 0) {
|
@@ -4087,8 +3632,7 @@ static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uc
|
|
4087
3632
|
}
|
4088
3633
|
}
|
4089
3634
|
|
4090
|
-
static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
|
4091
|
-
{
|
3635
|
+
static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num) {
|
4092
3636
|
GET_READER();
|
4093
3637
|
MR(ir)->num_docs_cache = -1; /* invalidate cache */
|
4094
3638
|
|
@@ -4098,8 +3642,7 @@ static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
|
|
4098
3642
|
ir->has_changes = true;
|
4099
3643
|
}
|
4100
3644
|
|
4101
|
-
static void mr_undelete_all_i(FrtIndexReader *ir)
|
4102
|
-
{
|
3645
|
+
static void mr_undelete_all_i(FrtIndexReader *ir) {
|
4103
3646
|
int i;
|
4104
3647
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4105
3648
|
|
@@ -4112,8 +3655,7 @@ static void mr_undelete_all_i(FrtIndexReader *ir)
|
|
4112
3655
|
ir->has_changes = true;
|
4113
3656
|
}
|
4114
3657
|
|
4115
|
-
static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
|
4116
|
-
{
|
3658
|
+
static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
|
4117
3659
|
int i;
|
4118
3660
|
ir->deleter = deleter;
|
4119
3661
|
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
@@ -4122,8 +3664,7 @@ static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
|
|
4122
3664
|
}
|
4123
3665
|
}
|
4124
3666
|
|
4125
|
-
static bool mr_is_latest_i(FrtIndexReader *ir)
|
4126
|
-
{
|
3667
|
+
static bool mr_is_latest_i(FrtIndexReader *ir) {
|
4127
3668
|
int i;
|
4128
3669
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4129
3670
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4134,8 +3675,7 @@ static bool mr_is_latest_i(FrtIndexReader *ir)
|
|
4134
3675
|
return true;
|
4135
3676
|
}
|
4136
3677
|
|
4137
|
-
static void mr_commit_i(FrtIndexReader *ir)
|
4138
|
-
{
|
3678
|
+
static void mr_commit_i(FrtIndexReader *ir) {
|
4139
3679
|
int i;
|
4140
3680
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4141
3681
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4144,8 +3684,7 @@ static void mr_commit_i(FrtIndexReader *ir)
|
|
4144
3684
|
}
|
4145
3685
|
}
|
4146
3686
|
|
4147
|
-
static void mr_close_i(FrtIndexReader *ir)
|
4148
|
-
{
|
3687
|
+
static void mr_close_i(FrtIndexReader *ir) {
|
4149
3688
|
int i;
|
4150
3689
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4151
3690
|
for (i = 0; i < mr_reader_cnt; i++) {
|
@@ -4351,8 +3890,7 @@ FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store) {
|
|
4351
3890
|
*
|
4352
3891
|
****************************************************************************/
|
4353
3892
|
|
4354
|
-
static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
|
4355
|
-
{
|
3893
|
+
static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos) {
|
4356
3894
|
FrtOccurence *occ = FRT_MP_ALLOC(mp, FrtOccurence);
|
4357
3895
|
occ->pos = pos;
|
4358
3896
|
occ->next = NULL;
|
@@ -4365,8 +3903,7 @@ static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
|
|
4365
3903
|
*
|
4366
3904
|
****************************************************************************/
|
4367
3905
|
|
4368
|
-
FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
|
4369
|
-
{
|
3906
|
+
FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos) {
|
4370
3907
|
FrtPosting *p = FRT_MP_ALLOC(mp, FrtPosting);
|
4371
3908
|
p->doc_num = doc_num;
|
4372
3909
|
p->first_occ = occ_new(mp, pos);
|
@@ -4382,8 +3919,7 @@ FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
|
|
4382
3919
|
****************************************************************************/
|
4383
3920
|
|
4384
3921
|
FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
4385
|
-
int term_len, FrtPosting *p)
|
4386
|
-
{
|
3922
|
+
int term_len, FrtPosting *p) {
|
4387
3923
|
// TODO account for term_len as measured in the original text vs utf8 term_len of term
|
4388
3924
|
FrtPostingList *pl = FRT_MP_ALLOC(mp, FrtPostingList);
|
4389
3925
|
pl->term = (char *)frt_mp_memdup(mp, term, term_len + 1);
|
@@ -4393,20 +3929,17 @@ FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
|
|
4393
3929
|
return pl;
|
4394
3930
|
}
|
4395
3931
|
|
4396
|
-
void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos)
|
4397
|
-
{
|
3932
|
+
void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos) {
|
4398
3933
|
pl->last_occ = pl->last_occ->next = occ_new(mp, pos);
|
4399
3934
|
pl->last->freq++;
|
4400
3935
|
}
|
4401
3936
|
|
4402
|
-
static void pl_add_posting(FrtPostingList *pl, FrtPosting *p)
|
4403
|
-
{
|
3937
|
+
static void pl_add_posting(FrtPostingList *pl, FrtPosting *p) {
|
4404
3938
|
pl->last = pl->last->next = p;
|
4405
3939
|
pl->last_occ = p->first_occ;
|
4406
3940
|
}
|
4407
3941
|
|
4408
|
-
int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
4409
|
-
{
|
3942
|
+
int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2) {
|
4410
3943
|
return strcmp((*pl1)->term, (*pl2)->term);
|
4411
3944
|
}
|
4412
3945
|
|
@@ -4416,8 +3949,7 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
|
4416
3949
|
*
|
4417
3950
|
****************************************************************************/
|
4418
3951
|
|
4419
|
-
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
4420
|
-
{
|
3952
|
+
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi) {
|
4421
3953
|
FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
|
4422
3954
|
fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
|
4423
3955
|
fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
|
@@ -4434,8 +3966,7 @@ static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
|
4434
3966
|
return fld_inv;
|
4435
3967
|
}
|
4436
3968
|
|
4437
|
-
static void fld_inv_destroy(FrtFieldInverter *fld_inv)
|
4438
|
-
{
|
3969
|
+
static void fld_inv_destroy(FrtFieldInverter *fld_inv) {
|
4439
3970
|
frt_h_destroy(fld_inv->plists);
|
4440
3971
|
}
|
4441
3972
|
|
@@ -4445,8 +3976,7 @@ static void fld_inv_destroy(FrtFieldInverter *fld_inv)
|
|
4445
3976
|
*
|
4446
3977
|
****************************************************************************/
|
4447
3978
|
|
4448
|
-
typedef struct SkipBuffer
|
4449
|
-
{
|
3979
|
+
typedef struct SkipBuffer {
|
4450
3980
|
FrtOutStream *buf;
|
4451
3981
|
FrtOutStream *frq_out;
|
4452
3982
|
FrtOutStream *prx_out;
|
@@ -4455,16 +3985,14 @@ typedef struct SkipBuffer
|
|
4455
3985
|
frt_off_t last_prx_ptr;
|
4456
3986
|
} SkipBuffer;
|
4457
3987
|
|
4458
|
-
static void skip_buf_reset(SkipBuffer *skip_buf)
|
4459
|
-
{
|
3988
|
+
static void skip_buf_reset(SkipBuffer *skip_buf) {
|
4460
3989
|
frt_ramo_reset(skip_buf->buf);
|
4461
3990
|
skip_buf->last_doc = 0;
|
4462
3991
|
skip_buf->last_frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4463
3992
|
skip_buf->last_prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4464
3993
|
}
|
4465
3994
|
|
4466
|
-
static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
4467
|
-
{
|
3995
|
+
static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out) {
|
4468
3996
|
SkipBuffer *skip_buf = FRT_ALLOC(SkipBuffer);
|
4469
3997
|
skip_buf->buf = frt_ram_new_buffer();
|
4470
3998
|
skip_buf->frq_out = frq_out;
|
@@ -4472,8 +4000,7 @@ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
|
4472
4000
|
return skip_buf;
|
4473
4001
|
}
|
4474
4002
|
|
4475
|
-
static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
4476
|
-
{
|
4003
|
+
static void skip_buf_add(SkipBuffer *skip_buf, int doc) {
|
4477
4004
|
frt_off_t frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4478
4005
|
frt_off_t prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4479
4006
|
|
@@ -4486,15 +4013,13 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
|
4486
4013
|
skip_buf->last_prx_ptr = prx_ptr;
|
4487
4014
|
}
|
4488
4015
|
|
4489
|
-
static frt_off_t skip_buf_write(SkipBuffer *skip_buf)
|
4490
|
-
{
|
4016
|
+
static frt_off_t skip_buf_write(SkipBuffer *skip_buf) {
|
4491
4017
|
frt_off_t skip_ptr = frt_os_pos(skip_buf->frq_out);
|
4492
4018
|
frt_ramo_write_to(skip_buf->buf, skip_buf->frq_out);
|
4493
4019
|
return skip_ptr;
|
4494
4020
|
}
|
4495
4021
|
|
4496
|
-
static void skip_buf_destroy(SkipBuffer *skip_buf)
|
4497
|
-
{
|
4022
|
+
static void skip_buf_destroy(SkipBuffer *skip_buf) {
|
4498
4023
|
frt_ram_destroy_buffer(skip_buf->buf);
|
4499
4024
|
free(skip_buf);
|
4500
4025
|
}
|
@@ -4505,21 +4030,19 @@ static void skip_buf_destroy(SkipBuffer *skip_buf)
|
|
4505
4030
|
*
|
4506
4031
|
****************************************************************************/
|
4507
4032
|
|
4508
|
-
static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv)
|
4509
|
-
{
|
4033
|
+
static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv) {
|
4510
4034
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
4511
4035
|
FrtOutStream *norms_out;
|
4512
4036
|
frt_si_advance_norm_gen(dw->si, fld_inv->fi->number);
|
4513
4037
|
si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
|
4514
|
-
norms_out = dw->store->new_output(dw->store, file_name);
|
4038
|
+
norms_out = dw->store->new_output(dw->store, segm_idx_name, file_name);
|
4515
4039
|
frt_os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
|
4516
4040
|
frt_os_close(norms_out);
|
4517
4041
|
}
|
4518
4042
|
|
4519
4043
|
/* we'll use the postings Hash's table area to sort the postings as it is
|
4520
4044
|
* going to be zeroset soon anyway */
|
4521
|
-
static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
|
4522
|
-
{
|
4045
|
+
static FrtPostingList **dw_sort_postings(FrtHash *plists_ht) {
|
4523
4046
|
int i, j;
|
4524
4047
|
FrtHashEntry *he;
|
4525
4048
|
FrtPostingList **plists = (FrtPostingList **)plists_ht->table;
|
@@ -4537,8 +4060,7 @@ static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
|
|
4537
4060
|
return plists;
|
4538
4061
|
}
|
4539
4062
|
|
4540
|
-
static void dw_flush_streams(FrtDocWriter *dw)
|
4541
|
-
{
|
4063
|
+
static void dw_flush_streams(FrtDocWriter *dw) {
|
4542
4064
|
frt_mp_reset(dw->mp);
|
4543
4065
|
frt_fw_close(dw->fw);
|
4544
4066
|
dw->fw = NULL;
|
@@ -4546,8 +4068,7 @@ static void dw_flush_streams(FrtDocWriter *dw)
|
|
4546
4068
|
dw->doc_num = 0;
|
4547
4069
|
}
|
4548
4070
|
|
4549
|
-
static void dw_flush(FrtDocWriter *dw)
|
4550
|
-
{
|
4071
|
+
static void dw_flush(FrtDocWriter *dw) {
|
4551
4072
|
int i, j, last_doc, doc_code, doc_freq, last_pos, posting_count;
|
4552
4073
|
int skip_interval = dw->skip_interval;
|
4553
4074
|
FrtFieldInfos *fis = dw->fis;
|
@@ -4565,9 +4086,9 @@ static void dw_flush(FrtDocWriter *dw)
|
|
4565
4086
|
SkipBuffer *skip_buf;
|
4566
4087
|
|
4567
4088
|
sprintf(file_name, "%s.frq", dw->si->name);
|
4568
|
-
frq_out = store->new_output(store, file_name);
|
4089
|
+
frq_out = store->new_output(store, segm_idx_name, file_name);
|
4569
4090
|
sprintf(file_name, "%s.prx", dw->si->name);
|
4570
|
-
prx_out = store->new_output(store, file_name);
|
4091
|
+
prx_out = store->new_output(store, segm_idx_name, file_name);
|
4571
4092
|
skip_buf = skip_buf_new(frq_out, prx_out);
|
4572
4093
|
|
4573
4094
|
for (i = 0; i < fields_count; i++) {
|
@@ -4601,8 +4122,7 @@ static void dw_flush(FrtDocWriter *dw)
|
|
4601
4122
|
|
4602
4123
|
if (p->freq == 1) {
|
4603
4124
|
frt_os_write_vint(frq_out, 1|doc_code);
|
4604
|
-
}
|
4605
|
-
else {
|
4125
|
+
} else {
|
4606
4126
|
frt_os_write_vint(frq_out, doc_code);
|
4607
4127
|
frt_os_write_vint(frq_out, p->freq);
|
4608
4128
|
}
|
@@ -4625,8 +4145,7 @@ static void dw_flush(FrtDocWriter *dw)
|
|
4625
4145
|
dw_flush_streams(dw);
|
4626
4146
|
}
|
4627
4147
|
|
4628
|
-
FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
4629
|
-
{
|
4148
|
+
FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si) {
|
4630
4149
|
FrtStore *store = iw->store;
|
4631
4150
|
FrtMemoryPool *mp = frt_mp_new_capa(iw->config.chunk_size,
|
4632
4151
|
iw->config.max_buffer_memory/iw->config.chunk_size);
|
@@ -4658,14 +4177,12 @@ FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
|
4658
4177
|
return dw;
|
4659
4178
|
}
|
4660
4179
|
|
4661
|
-
void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si)
|
4662
|
-
{
|
4180
|
+
void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si) {
|
4663
4181
|
dw->fw = frt_fw_open(dw->store, si->name, dw->fis);
|
4664
4182
|
dw->si = si;
|
4665
4183
|
}
|
4666
4184
|
|
4667
|
-
void frt_dw_close(FrtDocWriter *dw)
|
4668
|
-
{
|
4185
|
+
void frt_dw_close(FrtDocWriter *dw) {
|
4669
4186
|
if (dw->doc_num) {
|
4670
4187
|
dw_flush(dw);
|
4671
4188
|
}
|
@@ -4680,8 +4197,7 @@ void frt_dw_close(FrtDocWriter *dw)
|
|
4680
4197
|
free(dw);
|
4681
4198
|
}
|
4682
4199
|
|
4683
|
-
FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi)
|
4684
|
-
{
|
4200
|
+
FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi) {
|
4685
4201
|
FrtFieldInverter *fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number);
|
4686
4202
|
|
4687
4203
|
if (!fld_inv) {
|
@@ -4697,8 +4213,7 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
4697
4213
|
int doc_num,
|
4698
4214
|
const char *text,
|
4699
4215
|
int len,
|
4700
|
-
int pos)
|
4701
|
-
{
|
4216
|
+
int pos) {
|
4702
4217
|
FrtHashEntry *pl_he;
|
4703
4218
|
if (frt_h_set_ext(curr_plists, text, &pl_he)) {
|
4704
4219
|
FrtPosting *p = frt_p_new(mp, doc_num, pos);
|
@@ -4708,21 +4223,18 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
4708
4223
|
if (frt_h_set_ext(fld_plists, text, &fld_pl_he)) {
|
4709
4224
|
fld_pl_he->value = pl = frt_pl_new(mp, text, len, p);
|
4710
4225
|
pl_he->key = fld_pl_he->key = (char *)pl->term;
|
4711
|
-
}
|
4712
|
-
else {
|
4226
|
+
} else {
|
4713
4227
|
pl = (FrtPostingList *)fld_pl_he->value;
|
4714
4228
|
pl_add_posting(pl, p);
|
4715
4229
|
pl_he->key = (char *)pl->term;
|
4716
4230
|
}
|
4717
4231
|
pl_he->value = pl;
|
4718
|
-
}
|
4719
|
-
else {
|
4232
|
+
} else {
|
4720
4233
|
frt_pl_add_occ(mp, (FrtPostingList *)pl_he->value, pos);
|
4721
4234
|
}
|
4722
4235
|
}
|
4723
4236
|
|
4724
|
-
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end)
|
4725
|
-
{
|
4237
|
+
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end) {
|
4726
4238
|
if (pos >= dw->offsets_capa) {
|
4727
4239
|
int old_capa = dw->offsets_capa;
|
4728
4240
|
while (pos >= dw->offsets_capa) {
|
@@ -4786,7 +4298,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDoc
|
|
4786
4298
|
buf[FRT_MAX_WORD_SIZE - 1] = '\0';
|
4787
4299
|
for (i = 0; i < df_size; i++) {
|
4788
4300
|
int len = df->lengths[i];
|
4789
|
-
char *data_ptr = df->data[i];
|
4301
|
+
const char *data_ptr = df->data[i];
|
4790
4302
|
if (len >= FRT_MAX_WORD_SIZE) {
|
4791
4303
|
char *head_last = rb_enc_left_char_head(data_ptr, data_ptr + FRT_MAX_WORD_SIZE - 1, data_ptr + len, df->encodings[i]);
|
4792
4304
|
len = head_last - data_ptr;
|
@@ -4828,7 +4340,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
4828
4340
|
FrtFieldInverter *fld_inv;
|
4829
4341
|
FrtHash *postings;
|
4830
4342
|
FrtFieldInfo *fi;
|
4831
|
-
const int doc_size = doc->
|
4343
|
+
const int doc_size = doc->field_count;
|
4832
4344
|
|
4833
4345
|
/* frt_fw_add_doc will add new fields as necessary */
|
4834
4346
|
frt_fw_add_doc(dw->fw, doc);
|
@@ -4867,23 +4379,21 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
4867
4379
|
****************************************************************************/
|
4868
4380
|
|
4869
4381
|
/* prepare an index ready for writing */
|
4870
|
-
void frt_index_create(FrtStore *store, FrtFieldInfos *fis)
|
4871
|
-
{
|
4382
|
+
void frt_index_create(FrtStore *store, FrtFieldInfos *fis) {
|
4872
4383
|
FrtSegmentInfos *sis = frt_sis_new(fis);
|
4873
|
-
store->clear_all(store);
|
4384
|
+
store->clear_all(store, segm_idx_name);
|
4874
4385
|
frt_sis_write(sis, store, NULL);
|
4875
4386
|
frt_sis_destroy(sis);
|
4876
4387
|
}
|
4877
4388
|
|
4878
4389
|
bool frt_index_is_locked(FrtStore *store) {
|
4879
|
-
FrtLock *write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
|
4390
|
+
FrtLock *write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
4880
4391
|
bool is_locked = write_lock->is_locked(write_lock);
|
4881
4392
|
frt_close_lock(write_lock);
|
4882
4393
|
return is_locked;
|
4883
4394
|
}
|
4884
4395
|
|
4885
|
-
int frt_iw_doc_count(FrtIndexWriter *iw)
|
4886
|
-
{
|
4396
|
+
int frt_iw_doc_count(FrtIndexWriter *iw) {
|
4887
4397
|
int i, doc_cnt = 0;
|
4888
4398
|
pthread_mutex_lock(&iw->mutex);
|
4889
4399
|
for (i = iw->sis->size - 1; i >= 0; i--) {
|
@@ -4909,13 +4419,11 @@ static void iw_flush_ram_segment(FrtIndexWriter *iw) {
|
|
4909
4419
|
pthread_mutex_unlock(&iw->store->mutex);
|
4910
4420
|
}
|
4911
4421
|
|
4912
|
-
void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
|
4913
|
-
{
|
4422
|
+
void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc) {
|
4914
4423
|
pthread_mutex_lock(&iw->mutex);
|
4915
4424
|
if (NULL == iw->dw) {
|
4916
4425
|
iw->dw = frt_dw_open(iw, frt_sis_new_segment(iw->sis, 0, iw->store));
|
4917
|
-
}
|
4918
|
-
else if (NULL == iw->dw->fw) {
|
4426
|
+
} else if (NULL == iw->dw->fw) {
|
4919
4427
|
frt_dw_new_segment(iw->dw, frt_sis_new_segment(iw->sis, 0, iw->store));
|
4920
4428
|
}
|
4921
4429
|
frt_dw_add_doc(iw->dw, doc);
|
@@ -4926,15 +4434,13 @@ void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
|
|
4926
4434
|
pthread_mutex_unlock(&iw->mutex);
|
4927
4435
|
}
|
4928
4436
|
|
4929
|
-
static void iw_commit_i(FrtIndexWriter *iw)
|
4930
|
-
{
|
4437
|
+
static void iw_commit_i(FrtIndexWriter *iw) {
|
4931
4438
|
if (iw->dw && iw->dw->doc_num > 0) {
|
4932
4439
|
iw_flush_ram_segment(iw);
|
4933
4440
|
}
|
4934
4441
|
}
|
4935
4442
|
|
4936
|
-
void frt_iw_commit(FrtIndexWriter *iw)
|
4937
|
-
{
|
4443
|
+
void frt_iw_commit(FrtIndexWriter *iw) {
|
4938
4444
|
pthread_mutex_lock(&iw->mutex);
|
4939
4445
|
iw_commit_i(iw);
|
4940
4446
|
pthread_mutex_unlock(&iw->mutex);
|
@@ -5010,8 +4516,7 @@ void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int t
|
|
5010
4516
|
}
|
5011
4517
|
}
|
5012
4518
|
|
5013
|
-
void frt_iw_close(FrtIndexWriter *iw)
|
5014
|
-
{
|
4519
|
+
void frt_iw_close(FrtIndexWriter *iw) {
|
5015
4520
|
pthread_mutex_lock(&iw->mutex);
|
5016
4521
|
iw_commit_i(iw);
|
5017
4522
|
if (iw->dw) {
|
@@ -5047,7 +4552,7 @@ FrtIndexWriter *frt_iw_open(FrtIndexWriter *iw, FrtStore *store, FrtAnalyzer *vo
|
|
5047
4552
|
iw->config = *config;
|
5048
4553
|
|
5049
4554
|
FRT_TRY
|
5050
|
-
iw->write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
|
4555
|
+
iw->write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
|
5051
4556
|
if (!iw->write_lock->obtain(iw->write_lock)) {
|
5052
4557
|
FRT_RAISE(FRT_LOCK_ERROR, "Couldn't obtain write lock when opening IndexWriter");
|
5053
4558
|
}
|
@@ -5089,21 +4594,21 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
5089
4594
|
char *sr_segment = sr->si->name;
|
5090
4595
|
|
5091
4596
|
sprintf(file_name, "%s.fdt", segment);
|
5092
|
-
fdt_out = store_out->new_output(store_out, file_name);
|
4597
|
+
fdt_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5093
4598
|
sprintf(file_name, "%s.fdx", segment);
|
5094
|
-
fdx_out = store_out->new_output(store_out, file_name);
|
4599
|
+
fdx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5095
4600
|
|
5096
4601
|
sprintf(file_name, "%s.fdt", sr_segment);
|
5097
|
-
fdt_in = store_in->open_input(store_in, file_name);
|
4602
|
+
fdt_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5098
4603
|
sprintf(file_name, "%s.fdx", sr_segment);
|
5099
|
-
fdx_in = store_in->open_input(store_in, file_name);
|
4604
|
+
fdx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5100
4605
|
|
5101
4606
|
sprintf(file_name, "%s.del", sr_segment);
|
5102
|
-
if (store_in->exists(store_in, file_name)) {
|
4607
|
+
if (store_in->exists(store_in, segm_idx_name, file_name)) {
|
5103
4608
|
FrtOutStream *del_out;
|
5104
|
-
FrtInStream *del_in = store_in->open_input(store_in, file_name);
|
4609
|
+
FrtInStream *del_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5105
4610
|
sprintf(file_name, "%s.del", segment);
|
5106
|
-
del_out = store_out->new_output(store_out, file_name);
|
4611
|
+
del_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5107
4612
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
5108
4613
|
frt_os_close(del_out);
|
5109
4614
|
frt_is_close(del_in);
|
@@ -5170,8 +4675,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
5170
4675
|
}
|
5171
4676
|
|
5172
4677
|
static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
5173
|
-
const char *segment, int *map)
|
5174
|
-
{
|
4678
|
+
const char *segment, int *map) {
|
5175
4679
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5176
4680
|
FrtOutStream *tix_out, *tis_out, *tfx_out, *frq_out, *prx_out;
|
5177
4681
|
FrtInStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
|
@@ -5180,29 +4684,29 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5180
4684
|
char *sr_segment = sr->si->name;
|
5181
4685
|
|
5182
4686
|
sprintf(file_name, "%s.tix", segment);
|
5183
|
-
tix_out = store_out->new_output(store_out, file_name);
|
4687
|
+
tix_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5184
4688
|
sprintf(file_name, "%s.tix", sr_segment);
|
5185
|
-
tix_in = store_in->open_input(store_in, file_name);
|
4689
|
+
tix_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5186
4690
|
|
5187
4691
|
sprintf(file_name, "%s.tis", segment);
|
5188
|
-
tis_out = store_out->new_output(store_out, file_name);
|
4692
|
+
tis_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5189
4693
|
sprintf(file_name, "%s.tis", sr_segment);
|
5190
|
-
tis_in = store_in->open_input(store_in, file_name);
|
4694
|
+
tis_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5191
4695
|
|
5192
4696
|
sprintf(file_name, "%s.tfx", segment);
|
5193
|
-
tfx_out = store_out->new_output(store_out, file_name);
|
4697
|
+
tfx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5194
4698
|
sprintf(file_name, "%s.tfx", sr_segment);
|
5195
|
-
tfx_in = store_in->open_input(store_in, file_name);
|
4699
|
+
tfx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5196
4700
|
|
5197
4701
|
sprintf(file_name, "%s.frq", segment);
|
5198
|
-
frq_out = store_out->new_output(store_out, file_name);
|
4702
|
+
frq_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5199
4703
|
sprintf(file_name, "%s.frq", sr_segment);
|
5200
|
-
frq_in = store_in->open_input(store_in, file_name);
|
4704
|
+
frq_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5201
4705
|
|
5202
4706
|
sprintf(file_name, "%s.prx", segment);
|
5203
|
-
prx_out = store_out->new_output(store_out, file_name);
|
4707
|
+
prx_out = store_out->new_output(store_out, segm_idx_name, file_name);
|
5204
4708
|
sprintf(file_name, "%s.prx", sr_segment);
|
5205
|
-
prx_in = store_in->open_input(store_in, file_name);
|
4709
|
+
prx_in = store_in->open_input(store_in, segm_idx_name, file_name);
|
5206
4710
|
|
5207
4711
|
if (map) {
|
5208
4712
|
int field_cnt = frt_is_read_u32(tfx_in);
|
@@ -5217,8 +4721,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5217
4721
|
frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* index size */
|
5218
4722
|
frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* dict size */
|
5219
4723
|
}
|
5220
|
-
}
|
5221
|
-
else {
|
4724
|
+
} else {
|
5222
4725
|
frt_is2os_copy_bytes(tfx_in, tfx_out, frt_is_length(tfx_in));
|
5223
4726
|
}
|
5224
4727
|
frt_is2os_copy_bytes(tix_in, tix_out, frt_is_length(tix_in));
|
@@ -5239,8 +4742,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5239
4742
|
}
|
5240
4743
|
|
5241
4744
|
static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
5242
|
-
FrtSegmentInfo *si, int *map)
|
5243
|
-
{
|
4745
|
+
FrtSegmentInfo *si, int *map) {
|
5244
4746
|
int i;
|
5245
4747
|
FrtFieldInfos *fis = IR(sr)->fis;
|
5246
4748
|
const int field_cnt = fis->size;
|
@@ -5256,10 +4758,10 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
5256
4758
|
FrtStore *store = IR(sr)->store;
|
5257
4759
|
int field_num = map ? map[i] : i;
|
5258
4760
|
|
5259
|
-
norms_in = store->open_input(store, file_name_in);
|
4761
|
+
norms_in = store->open_input(store, segm_idx_name, file_name_in);
|
5260
4762
|
frt_si_advance_norm_gen(si, field_num);
|
5261
4763
|
si_norm_file_name(si, file_name_out, field_num);
|
5262
|
-
norms_out = store_out->new_output(store_out, file_name_out);
|
4764
|
+
norms_out = store_out->new_output(store_out, segm_idx_name, file_name_out);
|
5263
4765
|
frt_is2os_copy_bytes(norms_in, norms_out, frt_is_length(norms_in));
|
5264
4766
|
frt_os_close(norms_out);
|
5265
4767
|
frt_is_close(norms_in);
|