isomorfeus-ferret 0.14.3 → 0.14.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +65 -55
- data/ext/isomorfeus_ferret_ext/frb_index.c +21 -19
- data/ext/isomorfeus_ferret_ext/frt_ind.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +66 -145
- data/ext/isomorfeus_ferret_ext/frt_index.h +24 -47
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +39 -39
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_index.c +36 -36
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +4 -4
- data/ext/isomorfeus_ferret_ext/test_segments.c +1 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +6 -6
- data/ext/isomorfeus_ferret_ext/test_threading.c +2 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
@@ -44,7 +44,7 @@ const FrtConfig frt_default_config = {
|
|
44
44
|
static void ste_reset(FrtTermEnum *te);
|
45
45
|
static char *ste_next(FrtTermEnum *te);
|
46
46
|
|
47
|
-
#define FORMAT
|
47
|
+
#define FORMAT 15
|
48
48
|
#define SEGMENTS_GEN_FILE_NAME "segments"
|
49
49
|
#define MAX_EXT_LEN 10
|
50
50
|
#define FRT_COMPRESSION_BUFFER_SIZE 16348
|
@@ -213,78 +213,11 @@ FrtHash *frt_co_hash_create(void) {
|
|
213
213
|
*
|
214
214
|
****************************************************************************/
|
215
215
|
|
216
|
-
static void
|
217
|
-
|
218
|
-
case FRT_STORE_NO:
|
219
|
-
break;
|
220
|
-
case FRT_STORE_YES:
|
221
|
-
fi->bits |= FRT_FI_IS_STORED_BM;
|
222
|
-
break;
|
223
|
-
}
|
224
|
-
}
|
225
|
-
|
226
|
-
static void fi_set_compression(FrtFieldInfo *fi, FrtCompressionType compression) {
|
227
|
-
switch (compression) {
|
228
|
-
case FRT_COMPRESSION_NONE:
|
229
|
-
break;
|
230
|
-
case FRT_COMPRESSION_BROTLI:
|
231
|
-
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
|
232
|
-
break;
|
233
|
-
case FRT_COMPRESSION_BZ2:
|
234
|
-
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
|
235
|
-
break;
|
236
|
-
case FRT_COMPRESSION_LZ4:
|
237
|
-
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
|
238
|
-
break;
|
239
|
-
}
|
240
|
-
}
|
241
|
-
|
242
|
-
static void fi_set_index(FrtFieldInfo *fi, FrtIndexValue index) {
|
243
|
-
switch (index) {
|
244
|
-
case FRT_INDEX_NO:
|
245
|
-
break;
|
246
|
-
case FRT_INDEX_YES:
|
247
|
-
fi->bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM;
|
248
|
-
break;
|
249
|
-
case FRT_INDEX_UNTOKENIZED:
|
250
|
-
fi->bits |= FRT_FI_IS_INDEXED_BM;
|
251
|
-
break;
|
252
|
-
case FRT_INDEX_YES_OMIT_NORMS:
|
253
|
-
fi->bits |= FRT_FI_OMIT_NORMS_BM | FRT_FI_IS_INDEXED_BM |
|
254
|
-
FRT_FI_IS_TOKENIZED_BM;
|
255
|
-
break;
|
256
|
-
case FRT_INDEX_UNTOKENIZED_OMIT_NORMS:
|
257
|
-
fi->bits |= FRT_FI_OMIT_NORMS_BM | FRT_FI_IS_INDEXED_BM;
|
258
|
-
break;
|
259
|
-
}
|
260
|
-
}
|
261
|
-
|
262
|
-
static void fi_set_term_vector(FrtFieldInfo *fi, FrtTermVectorValue term_vector) {
|
263
|
-
switch (term_vector) {
|
264
|
-
case FRT_TERM_VECTOR_NO:
|
265
|
-
break;
|
266
|
-
case FRT_TERM_VECTOR_YES:
|
267
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM;
|
268
|
-
break;
|
269
|
-
case FRT_TERM_VECTOR_WITH_POSITIONS:
|
270
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM;
|
271
|
-
break;
|
272
|
-
case FRT_TERM_VECTOR_WITH_OFFSETS:
|
273
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_OFFSETS_BM;
|
274
|
-
break;
|
275
|
-
case FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS:
|
276
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM |
|
277
|
-
FRT_FI_STORE_OFFSETS_BM;
|
278
|
-
break;
|
279
|
-
}
|
280
|
-
}
|
281
|
-
|
282
|
-
static void fi_check_params(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
283
|
-
(void)store;
|
284
|
-
if ((index == FRT_INDEX_NO) && (term_vector != FRT_TERM_VECTOR_NO)) {
|
216
|
+
static void fi_check_params(unsigned int bits) {
|
217
|
+
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
285
218
|
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
286
219
|
}
|
287
|
-
if ((
|
220
|
+
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
288
221
|
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
289
222
|
}
|
290
223
|
}
|
@@ -293,25 +226,21 @@ FrtFieldInfo *frt_fi_alloc(void) {
|
|
293
226
|
return FRT_ALLOC(FrtFieldInfo);
|
294
227
|
}
|
295
228
|
|
296
|
-
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name,
|
229
|
+
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
297
230
|
assert(NULL != name);
|
298
|
-
fi_check_params(
|
231
|
+
fi_check_params(bits);
|
299
232
|
fi->name = name;
|
300
233
|
fi->boost = 1.0f;
|
301
|
-
fi->bits =
|
302
|
-
fi_set_store(fi, store);
|
303
|
-
fi_set_compression(fi, compression);
|
304
|
-
fi_set_index(fi, index);
|
305
|
-
fi_set_term_vector(fi, term_vector);
|
234
|
+
fi->bits = bits;
|
306
235
|
fi->number = 0;
|
307
236
|
fi->ref_cnt = 1;
|
308
237
|
fi->rfi = Qnil;
|
309
238
|
return fi;
|
310
239
|
}
|
311
240
|
|
312
|
-
FrtFieldInfo *frt_fi_new(ID name,
|
241
|
+
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
313
242
|
FrtFieldInfo *fi = frt_fi_alloc();
|
314
|
-
return frt_fi_init(fi, name,
|
243
|
+
return frt_fi_init(fi, name, bits);
|
315
244
|
}
|
316
245
|
|
317
246
|
void frt_fi_deref(FrtFieldInfo *fi) {
|
@@ -319,12 +248,12 @@ void frt_fi_deref(FrtFieldInfo *fi) {
|
|
319
248
|
}
|
320
249
|
|
321
250
|
FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
|
322
|
-
if (
|
323
|
-
if (
|
251
|
+
if (bits_is_compressed(fi->bits)) {
|
252
|
+
if (bits_is_compressed_brotli(fi->bits)) {
|
324
253
|
return FRT_COMPRESSION_BROTLI;
|
325
|
-
} else if (
|
254
|
+
} else if (bits_is_compressed_bz2(fi->bits)) {
|
326
255
|
return FRT_COMPRESSION_BZ2;
|
327
|
-
} else if (
|
256
|
+
} else if (bits_is_compressed_lz4(fi->bits)) {
|
328
257
|
return FRT_COMPRESSION_LZ4;
|
329
258
|
} else {
|
330
259
|
return FRT_COMPRESSION_BROTLI;
|
@@ -340,14 +269,14 @@ char *frt_fi_to_s(FrtFieldInfo *fi)
|
|
340
269
|
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
341
270
|
char *s = str;
|
342
271
|
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
272
|
+
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
273
|
+
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
274
|
+
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
275
|
+
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
276
|
+
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
277
|
+
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
278
|
+
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
279
|
+
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
351
280
|
s -= 2;
|
352
281
|
if (*s != ',') {
|
353
282
|
s += 2;
|
@@ -367,24 +296,21 @@ FrtFieldInfos *frt_fis_alloc(void) {
|
|
367
296
|
return FRT_ALLOC(FrtFieldInfos);
|
368
297
|
}
|
369
298
|
|
370
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis,
|
371
|
-
fi_check_params(
|
299
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
300
|
+
fi_check_params(bits);
|
372
301
|
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
373
302
|
fis->size = 0;
|
374
303
|
fis->capa = FIELD_INFOS_INIT_CAPA;
|
375
304
|
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
376
|
-
fis->
|
377
|
-
fis->compression = compression;
|
378
|
-
fis->index = index;
|
379
|
-
fis->term_vector = term_vector;
|
305
|
+
fis->bits = bits;
|
380
306
|
fis->ref_cnt = 1;
|
381
307
|
fis->rfis = Qnil;
|
382
308
|
return fis;
|
383
309
|
}
|
384
310
|
|
385
|
-
FrtFieldInfos *frt_fis_new(
|
311
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
386
312
|
FrtFieldInfos *fis = frt_fis_alloc();
|
387
|
-
return frt_fis_init(fis,
|
313
|
+
return frt_fis_init(fis, bits);
|
388
314
|
}
|
389
315
|
|
390
316
|
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
@@ -415,7 +341,7 @@ int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
|
415
341
|
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
416
342
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
417
343
|
if (!fi) {
|
418
|
-
fi = (FrtFieldInfo*)frt_fi_new(name, fis->
|
344
|
+
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
419
345
|
frt_fis_add_field(fis, fi);
|
420
346
|
}
|
421
347
|
return fi;
|
@@ -427,14 +353,10 @@ FrtFieldInfos *frt_fis_read(FrtInStream *is)
|
|
427
353
|
char *field_name;
|
428
354
|
FRT_TRY
|
429
355
|
do {
|
430
|
-
FrtTermVectorValue term_vector_val;
|
431
356
|
volatile int i;
|
432
357
|
union { frt_u32 i; float f; } tmp;
|
433
358
|
FrtFieldInfo *volatile fi;
|
434
|
-
|
435
|
-
FrtIndexValue index_val = (FrtIndexValue)frt_is_read_vint(is);
|
436
|
-
term_vector_val = (FrtTermVectorValue)frt_is_read_vint(is);
|
437
|
-
fis = frt_fis_new(store_val, FRT_COMPRESSION_NONE, index_val, term_vector_val); // TODO compression, read from store?
|
359
|
+
fis = frt_fis_new(frt_is_read_vint(is));
|
438
360
|
for (i = frt_is_read_vint(is); i > 0; i--) {
|
439
361
|
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
440
362
|
FRT_TRY
|
@@ -464,9 +386,7 @@ void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
|
464
386
|
FrtFieldInfo *fi;
|
465
387
|
const int fis_size = fis->size;
|
466
388
|
|
467
|
-
frt_os_write_vint(os, fis->
|
468
|
-
frt_os_write_vint(os, fis->index);
|
469
|
-
frt_os_write_vint(os, fis->term_vector);
|
389
|
+
frt_os_write_vint(os, fis->bits);
|
470
390
|
frt_os_write_vint(os, fis->size);
|
471
391
|
|
472
392
|
for (i = 0; i < fis_size; i++) {
|
@@ -536,9 +456,9 @@ char *frt_fis_to_s(FrtFieldInfos *fis)
|
|
536
456
|
" index: %s\n"
|
537
457
|
" term_vector: %s\n"
|
538
458
|
"fields:\n",
|
539
|
-
store_str[fis->
|
540
|
-
index_str[fis->
|
541
|
-
term_vector_str[fis->
|
459
|
+
store_str[fis->bits & 0x3],
|
460
|
+
index_str[(fis->bits >> 2) & 0x7],
|
461
|
+
term_vector_str[(fis->bits >> 5) & 0x7]);
|
542
462
|
for (i = 0; i < fis_size; i++) {
|
543
463
|
fi = fis->fields[i];
|
544
464
|
pos += sprintf(buf + pos,
|
@@ -568,7 +488,7 @@ static bool fis_has_vectors(FrtFieldInfos *fis)
|
|
568
488
|
const int fis_size = fis->size;
|
569
489
|
|
570
490
|
for (i = 0; i < fis_size; i++) {
|
571
|
-
if (
|
491
|
+
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
572
492
|
return true;
|
573
493
|
}
|
574
494
|
}
|
@@ -1047,6 +967,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
1047
967
|
{
|
1048
968
|
int seg_cnt;
|
1049
969
|
int i;
|
970
|
+
frt_u32 format = 0;
|
1050
971
|
volatile bool success = false;
|
1051
972
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
1052
973
|
FrtInStream *volatile is = NULL;
|
@@ -1058,7 +979,9 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
1058
979
|
sis->store = store;
|
1059
980
|
FRT_REF(store);
|
1060
981
|
sis->generation = fsf->generation;
|
1061
|
-
|
982
|
+
format = frt_is_read_u32(is);
|
983
|
+
if (format == FORMAT) sis->format = format;
|
984
|
+
else FRT_RAISE(FRT_EXCEPTION, "Wrong index format, required format '%u', format of given index '%u'", FORMAT, format);
|
1062
985
|
sis->version = frt_is_read_u64(is);
|
1063
986
|
sis->counter = frt_is_read_u64(is);
|
1064
987
|
seg_cnt = frt_is_read_vint(is);
|
@@ -1073,9 +996,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
1073
996
|
success = true;
|
1074
997
|
FRT_XFINALLY
|
1075
998
|
if (is) frt_is_close(is);
|
1076
|
-
if (!success)
|
1077
|
-
frt_sis_destroy(sis);
|
1078
|
-
}
|
999
|
+
if (!success) frt_sis_destroy(sis);
|
1079
1000
|
FRT_XENDTRY
|
1080
1001
|
fsf->ret.sis = sis;
|
1081
1002
|
}
|
@@ -1126,16 +1047,17 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
1126
1047
|
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
1127
1048
|
{
|
1128
1049
|
FrtInStream *is;
|
1129
|
-
|
1050
|
+
frt_u32 format = 0;
|
1051
|
+
frt_u64 version = 0;
|
1130
1052
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
1131
1053
|
|
1132
1054
|
segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
|
1133
1055
|
is = store->open_input(store, seg_file_name);
|
1134
|
-
version = 0;
|
1135
1056
|
|
1136
1057
|
FRT_TRY
|
1137
|
-
frt_is_read_u32(is); // format
|
1138
|
-
version = frt_is_read_u64(is);
|
1058
|
+
format = frt_is_read_u32(is); // format
|
1059
|
+
if (format == FORMAT) version = frt_is_read_u64(is);
|
1060
|
+
else FRT_RAISE(FRT_EXCEPTION, "Wrong index format, required format '%u', format of given index '%u'", FORMAT, format);
|
1139
1061
|
FRT_XFINALLY
|
1140
1062
|
frt_is_close(is);
|
1141
1063
|
FRT_XENDTRY
|
@@ -1671,8 +1593,8 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
|
|
1671
1593
|
|
1672
1594
|
if (num_terms > 0) {
|
1673
1595
|
int i, j, delta_start, delta_len, total_len, freq;
|
1674
|
-
int store_positions =
|
1675
|
-
int store_offsets =
|
1596
|
+
int store_positions = bits_store_positions(fi->bits);
|
1597
|
+
int store_offsets = bits_store_offsets(fi->bits);
|
1676
1598
|
frt_uchar buffer[FRT_MAX_WORD_SIZE];
|
1677
1599
|
FrtTVTerm *term;
|
1678
1600
|
|
@@ -1975,7 +1897,7 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1975
1897
|
|
1976
1898
|
for (i = 0; i < doc_size; i++) {
|
1977
1899
|
df = doc->fields[i];
|
1978
|
-
if (
|
1900
|
+
if (bits_is_stored(frt_fis_get_or_add_field(fw->fis, df->name)->bits)) {
|
1979
1901
|
stored_cnt++;
|
1980
1902
|
}
|
1981
1903
|
}
|
@@ -1989,12 +1911,12 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1989
1911
|
for (i = 0; i < doc_size; i++) {
|
1990
1912
|
df = doc->fields[i];
|
1991
1913
|
fi = frt_fis_get_field(fw->fis, df->name);
|
1992
|
-
if (
|
1914
|
+
if (bits_is_stored(fi->bits)) {
|
1993
1915
|
const int df_size = df->size;
|
1994
1916
|
frt_os_write_vint(fdt_out, fi->number);
|
1995
1917
|
frt_os_write_vint(fdt_out, df_size);
|
1996
1918
|
|
1997
|
-
if (
|
1919
|
+
if (bits_is_compressed(fi->bits)) {
|
1998
1920
|
compression = frt_fi_get_compression(fi);
|
1999
1921
|
for (j = 0; j < df_size; j++) {
|
2000
1922
|
const int length = df->lengths[j];
|
@@ -2048,7 +1970,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
2048
1970
|
FrtPosting *posting;
|
2049
1971
|
FrtOccurence *occ;
|
2050
1972
|
FrtFieldInfo *fi = fw->fis->fields[field_num];
|
2051
|
-
int store_positions =
|
1973
|
+
int store_positions = bits_store_positions(fi->bits);
|
2052
1974
|
|
2053
1975
|
frt_ary_grow(fw->tv_fields);
|
2054
1976
|
frt_ary_last(fw->tv_fields).field_num = field_num;
|
@@ -2080,7 +2002,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
2080
2002
|
|
2081
2003
|
}
|
2082
2004
|
|
2083
|
-
if (
|
2005
|
+
if (bits_store_offsets(fi->bits)) {
|
2084
2006
|
/* use delta encoding for offsets */
|
2085
2007
|
frt_i64 last_end = 0;
|
2086
2008
|
frt_os_write_vint(fdt_out, offset_count); /* write shared prefix length */
|
@@ -4042,8 +3964,8 @@ void frt_ir_commit(FrtIndexReader *ir)
|
|
4042
3964
|
|
4043
3965
|
void frt_ir_close(FrtIndexReader *ir) {
|
4044
3966
|
if (ir->ref_cnt == 0) {
|
4045
|
-
fprintf(stderr, "
|
4046
|
-
|
3967
|
+
fprintf(stderr, "Warning: IndexReader ref_cnt to low\n");
|
3968
|
+
return;
|
4047
3969
|
}
|
4048
3970
|
|
4049
3971
|
if (FRT_DEREF(ir) == 0) {
|
@@ -4285,7 +4207,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
4285
4207
|
FrtFieldInfo *fi;
|
4286
4208
|
for (i = field_cnt - 1; i >= 0; i--) {
|
4287
4209
|
fi = ir->fis->fields[i];
|
4288
|
-
if (
|
4210
|
+
if (bits_is_indexed(fi->bits)) {
|
4289
4211
|
Norm *norm = (Norm *)frt_h_get_int(SR(ir)->norms, fi->number);
|
4290
4212
|
if (norm && norm->is_dirty) {
|
4291
4213
|
norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
|
@@ -4415,7 +4337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
4415
4337
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(ir->fis->field_dict, (void *)field);
|
4416
4338
|
FrtFieldsReader *fr;
|
4417
4339
|
|
4418
|
-
if (!fi || !
|
4340
|
+
if (!fi || !bits_store_term_vector(fi->bits) || !SR(ir)->fr || !(fr = sr_fr(SR(ir)))) {
|
4419
4341
|
return NULL;
|
4420
4342
|
}
|
4421
4343
|
|
@@ -4903,7 +4825,7 @@ FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, co
|
|
4903
4825
|
ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
|
4904
4826
|
FrtMultiReader *mr = MR(ir);
|
4905
4827
|
/* defaults don't matter, this is just for reading fields, not adding */
|
4906
|
-
FrtFieldInfos *fis = frt_fis_new(
|
4828
|
+
FrtFieldInfos *fis = frt_fis_new(0);
|
4907
4829
|
int i, j;
|
4908
4830
|
bool need_field_map = false;
|
4909
4831
|
|
@@ -5082,10 +5004,10 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
|
5082
5004
|
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
5083
5005
|
{
|
5084
5006
|
FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
|
5085
|
-
fld_inv->is_tokenized =
|
5086
|
-
fld_inv->store_term_vector =
|
5087
|
-
fld_inv->store_offsets =
|
5088
|
-
if ((fld_inv->has_norms =
|
5007
|
+
fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
|
5008
|
+
fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
|
5009
|
+
fld_inv->store_offsets = bits_store_offsets(fi->bits);
|
5010
|
+
if ((fld_inv->has_norms = bits_has_norms(fi->bits)) == true) {
|
5089
5011
|
fld_inv->norms = FRT_MP_ALLOC_AND_ZERO_N(dw->mp, frt_uchar,
|
5090
5012
|
dw->max_buffered_docs);
|
5091
5013
|
}
|
@@ -5235,11 +5157,11 @@ static void dw_flush(FrtDocWriter *dw)
|
|
5235
5157
|
|
5236
5158
|
for (i = 0; i < fields_count; i++) {
|
5237
5159
|
fi = fis->fields[i];
|
5238
|
-
if (!
|
5160
|
+
if (!bits_is_indexed(fi->bits) || NULL ==
|
5239
5161
|
(fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number))) {
|
5240
5162
|
continue;
|
5241
5163
|
}
|
5242
|
-
if (!
|
5164
|
+
if (!bits_omit_norms(fi->bits)) {
|
5243
5165
|
dw_write_norms(dw, fld_inv);
|
5244
5166
|
}
|
5245
5167
|
|
@@ -5499,7 +5421,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
5499
5421
|
for (i = 0; i < doc_size; i++) {
|
5500
5422
|
df = doc->fields[i];
|
5501
5423
|
fi = frt_fis_get_field(dw->fis, df->name);
|
5502
|
-
if (!
|
5424
|
+
if (!bits_is_indexed(fi->bits)) {
|
5503
5425
|
continue;
|
5504
5426
|
}
|
5505
5427
|
fld_inv = frt_dw_get_fld_inv(dw, fi);
|
@@ -5941,7 +5863,7 @@ static void sm_merge_norms(SegmentMerger *sm)
|
|
5941
5863
|
const int seg_cnt = sm->seg_cnt;
|
5942
5864
|
for (i = sm->fis->size - 1; i >= 0; i--) {
|
5943
5865
|
fi = sm->fis->fields[i];
|
5944
|
-
if (
|
5866
|
+
if (bits_has_norms(fi->bits)) {
|
5945
5867
|
si = sm->si;
|
5946
5868
|
frt_si_advance_norm_gen(si, i);
|
5947
5869
|
si_norm_file_name(si, file_name, i);
|
@@ -6042,7 +5964,7 @@ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegm
|
|
6042
5964
|
|
6043
5965
|
/* Field norm file_names */
|
6044
5966
|
for (i = fis->size - 1; i >= 0; i--) {
|
6045
|
-
if (
|
5967
|
+
if (bits_has_norms(fis->fields[i]->bits) && si_norm_file_name(si, file_name, i)) {
|
6046
5968
|
frt_cw_add_file(cw, file_name);
|
6047
5969
|
}
|
6048
5970
|
}
|
@@ -6510,7 +6432,7 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
6510
6432
|
char file_name_out[FRT_SEGMENT_NAME_MAX_LENGTH];
|
6511
6433
|
|
6512
6434
|
for (i = 0; i < field_cnt; i++) {
|
6513
|
-
if (
|
6435
|
+
if (bits_has_norms(fis->fields[i]->bits)
|
6514
6436
|
&& si_norm_file_name(sr->si, file_name_in, i)) {
|
6515
6437
|
FrtStore *store = (sr->si->use_compound_file
|
6516
6438
|
&& sr->si->norm_gens[i] == 0) ? sr->cfs_store
|
@@ -6567,8 +6489,7 @@ static void iw_add_segment(FrtIndexWriter *iw, FrtSegmentReader *sr)
|
|
6567
6489
|
FrtFieldInfo *fi = sub_fis->fields[j];
|
6568
6490
|
FrtFieldInfo *new_fi = frt_fis_get_field(fis, fi->name);
|
6569
6491
|
if (NULL == new_fi) {
|
6570
|
-
new_fi = frt_fi_new(fi->name,
|
6571
|
-
new_fi->bits = fi->bits;
|
6492
|
+
new_fi = frt_fi_new(fi->name, fi->bits);
|
6572
6493
|
frt_fis_add_field(fis, new_fi);
|
6573
6494
|
}
|
6574
6495
|
new_fi->bits |= fi->bits;
|
@@ -62,27 +62,7 @@ extern FrtHash *frt_co_hash_create();
|
|
62
62
|
*
|
63
63
|
****************************************************************************/
|
64
64
|
|
65
|
-
|
66
|
-
FRT_STORE_NO = 0,
|
67
|
-
FRT_STORE_YES = 1,
|
68
|
-
} FrtStoreValue;
|
69
|
-
|
70
|
-
typedef enum {
|
71
|
-
FRT_INDEX_NO = 0,
|
72
|
-
FRT_INDEX_UNTOKENIZED = 1,
|
73
|
-
FRT_INDEX_YES = 3,
|
74
|
-
FRT_INDEX_UNTOKENIZED_OMIT_NORMS = 5,
|
75
|
-
FRT_INDEX_YES_OMIT_NORMS = 7
|
76
|
-
} FrtIndexValue;
|
77
|
-
|
78
|
-
typedef enum {
|
79
|
-
FRT_TERM_VECTOR_NO = 0,
|
80
|
-
FRT_TERM_VECTOR_YES = 1,
|
81
|
-
FRT_TERM_VECTOR_WITH_POSITIONS = 3,
|
82
|
-
FRT_TERM_VECTOR_WITH_OFFSETS = 5,
|
83
|
-
FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
|
84
|
-
} FrtTermVectorValue;
|
85
|
-
|
65
|
+
#define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
|
86
66
|
#define FRT_FI_IS_STORED_BM 0x001
|
87
67
|
#define FRT_FI_IS_COMPRESSED_BM 0x002
|
88
68
|
#define FRT_FI_IS_INDEXED_BM 0x004
|
@@ -105,24 +85,24 @@ typedef struct FrtFieldInfo {
|
|
105
85
|
} FrtFieldInfo;
|
106
86
|
|
107
87
|
extern FrtFieldInfo *frt_fi_alloc();
|
108
|
-
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name,
|
109
|
-
extern FrtFieldInfo *frt_fi_new(ID name,
|
88
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
|
89
|
+
extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
|
110
90
|
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
111
91
|
extern void frt_fi_deref(FrtFieldInfo *fi);
|
112
92
|
|
113
|
-
#define
|
114
|
-
#define
|
115
|
-
#define
|
116
|
-
#define
|
117
|
-
#define
|
118
|
-
#define
|
119
|
-
#define
|
120
|
-
#define
|
121
|
-
#define
|
122
|
-
#define
|
123
|
-
#define
|
124
|
-
#define
|
125
|
-
((
|
93
|
+
#define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
|
94
|
+
#define bits_is_compressed(bits) ((bits & FRT_FI_IS_COMPRESSED_BM) != 0)
|
95
|
+
#define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
96
|
+
#define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
97
|
+
#define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
98
|
+
#define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
|
99
|
+
#define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
100
|
+
#define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
101
|
+
#define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
102
|
+
#define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
103
|
+
#define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
104
|
+
#define bits_has_norms(bits)\
|
105
|
+
((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
126
106
|
|
127
107
|
/****************************************************************************
|
128
108
|
*
|
@@ -133,21 +113,18 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
|
|
133
113
|
#define FIELD_INFOS_INIT_CAPA 4
|
134
114
|
/* carry changes over to dummy_fis in test/test_segments.c */
|
135
115
|
typedef struct FrtFieldInfos {
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
int capa;
|
142
|
-
FrtFieldInfo **fields;
|
143
|
-
FrtHash *field_dict;
|
116
|
+
unsigned int bits;
|
117
|
+
int size;
|
118
|
+
int capa;
|
119
|
+
FrtFieldInfo **fields;
|
120
|
+
FrtHash *field_dict;
|
144
121
|
_Atomic unsigned int ref_cnt;
|
145
|
-
VALUE
|
122
|
+
VALUE rfis;
|
146
123
|
} FrtFieldInfos;
|
147
124
|
|
148
125
|
FrtFieldInfos *frt_fis_alloc();
|
149
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis,
|
150
|
-
FrtFieldInfos *frt_fis_new(
|
126
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits);
|
127
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits);
|
151
128
|
extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
|
152
129
|
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
|
153
130
|
extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
|
@@ -151,8 +151,8 @@ static FrtMatchVector *spanq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, Fr
|
|
151
151
|
FrtMatchVector *full_mv = frt_matchv_new();
|
152
152
|
FrtHashSet *terms = SpQ(self)->get_terms(self);
|
153
153
|
/* FIXME What is going on here? Need to document this! */
|
154
|
-
ir->fis = frt_fis_new(
|
155
|
-
frt_fis_add_field(ir->fis, frt_fi_new(tv->field,
|
154
|
+
ir->fis = frt_fis_new(0);
|
155
|
+
frt_fis_add_field(ir->fis, frt_fi_new(tv->field, 0));
|
156
156
|
ir->store = (FrtStore *)tv;
|
157
157
|
FRT_REF((FrtStore *)tv);
|
158
158
|
ir->term_positions = &spanq_ir_term_positions;
|
@@ -6,7 +6,7 @@
|
|
6
6
|
extern rb_encoding *utf8_encoding;
|
7
7
|
|
8
8
|
static FrtFieldInfos *create_fis(void) {
|
9
|
-
FrtFieldInfos *fis = frt_fis_new(
|
9
|
+
FrtFieldInfos *fis = frt_fis_new(0 | FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM);
|
10
10
|
return fis;
|
11
11
|
}
|
12
12
|
|