isomorfeus-ferret 0.14.3 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +65 -55
- data/ext/isomorfeus_ferret_ext/frb_index.c +21 -19
- data/ext/isomorfeus_ferret_ext/frt_ind.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +66 -145
- data/ext/isomorfeus_ferret_ext/frt_index.h +24 -47
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +39 -39
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_index.c +36 -36
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +4 -4
- data/ext/isomorfeus_ferret_ext/test_segments.c +1 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +6 -6
- data/ext/isomorfeus_ferret_ext/test_threading.c +2 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
@@ -44,7 +44,7 @@ const FrtConfig frt_default_config = {
|
|
44
44
|
static void ste_reset(FrtTermEnum *te);
|
45
45
|
static char *ste_next(FrtTermEnum *te);
|
46
46
|
|
47
|
-
#define FORMAT
|
47
|
+
#define FORMAT 15
|
48
48
|
#define SEGMENTS_GEN_FILE_NAME "segments"
|
49
49
|
#define MAX_EXT_LEN 10
|
50
50
|
#define FRT_COMPRESSION_BUFFER_SIZE 16348
|
@@ -213,78 +213,11 @@ FrtHash *frt_co_hash_create(void) {
|
|
213
213
|
*
|
214
214
|
****************************************************************************/
|
215
215
|
|
216
|
-
static void
|
217
|
-
|
218
|
-
case FRT_STORE_NO:
|
219
|
-
break;
|
220
|
-
case FRT_STORE_YES:
|
221
|
-
fi->bits |= FRT_FI_IS_STORED_BM;
|
222
|
-
break;
|
223
|
-
}
|
224
|
-
}
|
225
|
-
|
226
|
-
static void fi_set_compression(FrtFieldInfo *fi, FrtCompressionType compression) {
|
227
|
-
switch (compression) {
|
228
|
-
case FRT_COMPRESSION_NONE:
|
229
|
-
break;
|
230
|
-
case FRT_COMPRESSION_BROTLI:
|
231
|
-
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
|
232
|
-
break;
|
233
|
-
case FRT_COMPRESSION_BZ2:
|
234
|
-
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
|
235
|
-
break;
|
236
|
-
case FRT_COMPRESSION_LZ4:
|
237
|
-
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
|
238
|
-
break;
|
239
|
-
}
|
240
|
-
}
|
241
|
-
|
242
|
-
static void fi_set_index(FrtFieldInfo *fi, FrtIndexValue index) {
|
243
|
-
switch (index) {
|
244
|
-
case FRT_INDEX_NO:
|
245
|
-
break;
|
246
|
-
case FRT_INDEX_YES:
|
247
|
-
fi->bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM;
|
248
|
-
break;
|
249
|
-
case FRT_INDEX_UNTOKENIZED:
|
250
|
-
fi->bits |= FRT_FI_IS_INDEXED_BM;
|
251
|
-
break;
|
252
|
-
case FRT_INDEX_YES_OMIT_NORMS:
|
253
|
-
fi->bits |= FRT_FI_OMIT_NORMS_BM | FRT_FI_IS_INDEXED_BM |
|
254
|
-
FRT_FI_IS_TOKENIZED_BM;
|
255
|
-
break;
|
256
|
-
case FRT_INDEX_UNTOKENIZED_OMIT_NORMS:
|
257
|
-
fi->bits |= FRT_FI_OMIT_NORMS_BM | FRT_FI_IS_INDEXED_BM;
|
258
|
-
break;
|
259
|
-
}
|
260
|
-
}
|
261
|
-
|
262
|
-
static void fi_set_term_vector(FrtFieldInfo *fi, FrtTermVectorValue term_vector) {
|
263
|
-
switch (term_vector) {
|
264
|
-
case FRT_TERM_VECTOR_NO:
|
265
|
-
break;
|
266
|
-
case FRT_TERM_VECTOR_YES:
|
267
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM;
|
268
|
-
break;
|
269
|
-
case FRT_TERM_VECTOR_WITH_POSITIONS:
|
270
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM;
|
271
|
-
break;
|
272
|
-
case FRT_TERM_VECTOR_WITH_OFFSETS:
|
273
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_OFFSETS_BM;
|
274
|
-
break;
|
275
|
-
case FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS:
|
276
|
-
fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM |
|
277
|
-
FRT_FI_STORE_OFFSETS_BM;
|
278
|
-
break;
|
279
|
-
}
|
280
|
-
}
|
281
|
-
|
282
|
-
static void fi_check_params(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
|
283
|
-
(void)store;
|
284
|
-
if ((index == FRT_INDEX_NO) && (term_vector != FRT_TERM_VECTOR_NO)) {
|
216
|
+
static void fi_check_params(unsigned int bits) {
|
217
|
+
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
285
218
|
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
286
219
|
}
|
287
|
-
if ((
|
220
|
+
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
288
221
|
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
289
222
|
}
|
290
223
|
}
|
@@ -293,25 +226,21 @@ FrtFieldInfo *frt_fi_alloc(void) {
|
|
293
226
|
return FRT_ALLOC(FrtFieldInfo);
|
294
227
|
}
|
295
228
|
|
296
|
-
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name,
|
229
|
+
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
297
230
|
assert(NULL != name);
|
298
|
-
fi_check_params(
|
231
|
+
fi_check_params(bits);
|
299
232
|
fi->name = name;
|
300
233
|
fi->boost = 1.0f;
|
301
|
-
fi->bits =
|
302
|
-
fi_set_store(fi, store);
|
303
|
-
fi_set_compression(fi, compression);
|
304
|
-
fi_set_index(fi, index);
|
305
|
-
fi_set_term_vector(fi, term_vector);
|
234
|
+
fi->bits = bits;
|
306
235
|
fi->number = 0;
|
307
236
|
fi->ref_cnt = 1;
|
308
237
|
fi->rfi = Qnil;
|
309
238
|
return fi;
|
310
239
|
}
|
311
240
|
|
312
|
-
FrtFieldInfo *frt_fi_new(ID name,
|
241
|
+
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
313
242
|
FrtFieldInfo *fi = frt_fi_alloc();
|
314
|
-
return frt_fi_init(fi, name,
|
243
|
+
return frt_fi_init(fi, name, bits);
|
315
244
|
}
|
316
245
|
|
317
246
|
void frt_fi_deref(FrtFieldInfo *fi) {
|
@@ -319,12 +248,12 @@ void frt_fi_deref(FrtFieldInfo *fi) {
|
|
319
248
|
}
|
320
249
|
|
321
250
|
FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
|
322
|
-
if (
|
323
|
-
if (
|
251
|
+
if (bits_is_compressed(fi->bits)) {
|
252
|
+
if (bits_is_compressed_brotli(fi->bits)) {
|
324
253
|
return FRT_COMPRESSION_BROTLI;
|
325
|
-
} else if (
|
254
|
+
} else if (bits_is_compressed_bz2(fi->bits)) {
|
326
255
|
return FRT_COMPRESSION_BZ2;
|
327
|
-
} else if (
|
256
|
+
} else if (bits_is_compressed_lz4(fi->bits)) {
|
328
257
|
return FRT_COMPRESSION_LZ4;
|
329
258
|
} else {
|
330
259
|
return FRT_COMPRESSION_BROTLI;
|
@@ -340,14 +269,14 @@ char *frt_fi_to_s(FrtFieldInfo *fi)
|
|
340
269
|
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
341
270
|
char *s = str;
|
342
271
|
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
272
|
+
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
273
|
+
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
274
|
+
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
275
|
+
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
276
|
+
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
277
|
+
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
278
|
+
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
279
|
+
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
351
280
|
s -= 2;
|
352
281
|
if (*s != ',') {
|
353
282
|
s += 2;
|
@@ -367,24 +296,21 @@ FrtFieldInfos *frt_fis_alloc(void) {
|
|
367
296
|
return FRT_ALLOC(FrtFieldInfos);
|
368
297
|
}
|
369
298
|
|
370
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis,
|
371
|
-
fi_check_params(
|
299
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
300
|
+
fi_check_params(bits);
|
372
301
|
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
373
302
|
fis->size = 0;
|
374
303
|
fis->capa = FIELD_INFOS_INIT_CAPA;
|
375
304
|
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
376
|
-
fis->
|
377
|
-
fis->compression = compression;
|
378
|
-
fis->index = index;
|
379
|
-
fis->term_vector = term_vector;
|
305
|
+
fis->bits = bits;
|
380
306
|
fis->ref_cnt = 1;
|
381
307
|
fis->rfis = Qnil;
|
382
308
|
return fis;
|
383
309
|
}
|
384
310
|
|
385
|
-
FrtFieldInfos *frt_fis_new(
|
311
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
386
312
|
FrtFieldInfos *fis = frt_fis_alloc();
|
387
|
-
return frt_fis_init(fis,
|
313
|
+
return frt_fis_init(fis, bits);
|
388
314
|
}
|
389
315
|
|
390
316
|
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
@@ -415,7 +341,7 @@ int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
|
415
341
|
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
416
342
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
417
343
|
if (!fi) {
|
418
|
-
fi = (FrtFieldInfo*)frt_fi_new(name, fis->
|
344
|
+
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
419
345
|
frt_fis_add_field(fis, fi);
|
420
346
|
}
|
421
347
|
return fi;
|
@@ -427,14 +353,10 @@ FrtFieldInfos *frt_fis_read(FrtInStream *is)
|
|
427
353
|
char *field_name;
|
428
354
|
FRT_TRY
|
429
355
|
do {
|
430
|
-
FrtTermVectorValue term_vector_val;
|
431
356
|
volatile int i;
|
432
357
|
union { frt_u32 i; float f; } tmp;
|
433
358
|
FrtFieldInfo *volatile fi;
|
434
|
-
|
435
|
-
FrtIndexValue index_val = (FrtIndexValue)frt_is_read_vint(is);
|
436
|
-
term_vector_val = (FrtTermVectorValue)frt_is_read_vint(is);
|
437
|
-
fis = frt_fis_new(store_val, FRT_COMPRESSION_NONE, index_val, term_vector_val); // TODO compression, read from store?
|
359
|
+
fis = frt_fis_new(frt_is_read_vint(is));
|
438
360
|
for (i = frt_is_read_vint(is); i > 0; i--) {
|
439
361
|
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
440
362
|
FRT_TRY
|
@@ -464,9 +386,7 @@ void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
|
464
386
|
FrtFieldInfo *fi;
|
465
387
|
const int fis_size = fis->size;
|
466
388
|
|
467
|
-
frt_os_write_vint(os, fis->
|
468
|
-
frt_os_write_vint(os, fis->index);
|
469
|
-
frt_os_write_vint(os, fis->term_vector);
|
389
|
+
frt_os_write_vint(os, fis->bits);
|
470
390
|
frt_os_write_vint(os, fis->size);
|
471
391
|
|
472
392
|
for (i = 0; i < fis_size; i++) {
|
@@ -536,9 +456,9 @@ char *frt_fis_to_s(FrtFieldInfos *fis)
|
|
536
456
|
" index: %s\n"
|
537
457
|
" term_vector: %s\n"
|
538
458
|
"fields:\n",
|
539
|
-
store_str[fis->
|
540
|
-
index_str[fis->
|
541
|
-
term_vector_str[fis->
|
459
|
+
store_str[fis->bits & 0x3],
|
460
|
+
index_str[(fis->bits >> 2) & 0x7],
|
461
|
+
term_vector_str[(fis->bits >> 5) & 0x7]);
|
542
462
|
for (i = 0; i < fis_size; i++) {
|
543
463
|
fi = fis->fields[i];
|
544
464
|
pos += sprintf(buf + pos,
|
@@ -568,7 +488,7 @@ static bool fis_has_vectors(FrtFieldInfos *fis)
|
|
568
488
|
const int fis_size = fis->size;
|
569
489
|
|
570
490
|
for (i = 0; i < fis_size; i++) {
|
571
|
-
if (
|
491
|
+
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
572
492
|
return true;
|
573
493
|
}
|
574
494
|
}
|
@@ -1047,6 +967,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
1047
967
|
{
|
1048
968
|
int seg_cnt;
|
1049
969
|
int i;
|
970
|
+
frt_u32 format = 0;
|
1050
971
|
volatile bool success = false;
|
1051
972
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
1052
973
|
FrtInStream *volatile is = NULL;
|
@@ -1058,7 +979,9 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
1058
979
|
sis->store = store;
|
1059
980
|
FRT_REF(store);
|
1060
981
|
sis->generation = fsf->generation;
|
1061
|
-
|
982
|
+
format = frt_is_read_u32(is);
|
983
|
+
if (format == FORMAT) sis->format = format;
|
984
|
+
else FRT_RAISE(FRT_EXCEPTION, "Wrong index format, required format '%u', format of given index '%u'", FORMAT, format);
|
1062
985
|
sis->version = frt_is_read_u64(is);
|
1063
986
|
sis->counter = frt_is_read_u64(is);
|
1064
987
|
seg_cnt = frt_is_read_vint(is);
|
@@ -1073,9 +996,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
|
|
1073
996
|
success = true;
|
1074
997
|
FRT_XFINALLY
|
1075
998
|
if (is) frt_is_close(is);
|
1076
|
-
if (!success)
|
1077
|
-
frt_sis_destroy(sis);
|
1078
|
-
}
|
999
|
+
if (!success) frt_sis_destroy(sis);
|
1079
1000
|
FRT_XENDTRY
|
1080
1001
|
fsf->ret.sis = sis;
|
1081
1002
|
}
|
@@ -1126,16 +1047,17 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
|
|
1126
1047
|
static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
|
1127
1048
|
{
|
1128
1049
|
FrtInStream *is;
|
1129
|
-
|
1050
|
+
frt_u32 format = 0;
|
1051
|
+
frt_u64 version = 0;
|
1130
1052
|
char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
1131
1053
|
|
1132
1054
|
segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
|
1133
1055
|
is = store->open_input(store, seg_file_name);
|
1134
|
-
version = 0;
|
1135
1056
|
|
1136
1057
|
FRT_TRY
|
1137
|
-
frt_is_read_u32(is); // format
|
1138
|
-
version = frt_is_read_u64(is);
|
1058
|
+
format = frt_is_read_u32(is); // format
|
1059
|
+
if (format == FORMAT) version = frt_is_read_u64(is);
|
1060
|
+
else FRT_RAISE(FRT_EXCEPTION, "Wrong index format, required format '%u', format of given index '%u'", FORMAT, format);
|
1139
1061
|
FRT_XFINALLY
|
1140
1062
|
frt_is_close(is);
|
1141
1063
|
FRT_XENDTRY
|
@@ -1671,8 +1593,8 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
|
|
1671
1593
|
|
1672
1594
|
if (num_terms > 0) {
|
1673
1595
|
int i, j, delta_start, delta_len, total_len, freq;
|
1674
|
-
int store_positions =
|
1675
|
-
int store_offsets =
|
1596
|
+
int store_positions = bits_store_positions(fi->bits);
|
1597
|
+
int store_offsets = bits_store_offsets(fi->bits);
|
1676
1598
|
frt_uchar buffer[FRT_MAX_WORD_SIZE];
|
1677
1599
|
FrtTVTerm *term;
|
1678
1600
|
|
@@ -1975,7 +1897,7 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1975
1897
|
|
1976
1898
|
for (i = 0; i < doc_size; i++) {
|
1977
1899
|
df = doc->fields[i];
|
1978
|
-
if (
|
1900
|
+
if (bits_is_stored(frt_fis_get_or_add_field(fw->fis, df->name)->bits)) {
|
1979
1901
|
stored_cnt++;
|
1980
1902
|
}
|
1981
1903
|
}
|
@@ -1989,12 +1911,12 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
|
|
1989
1911
|
for (i = 0; i < doc_size; i++) {
|
1990
1912
|
df = doc->fields[i];
|
1991
1913
|
fi = frt_fis_get_field(fw->fis, df->name);
|
1992
|
-
if (
|
1914
|
+
if (bits_is_stored(fi->bits)) {
|
1993
1915
|
const int df_size = df->size;
|
1994
1916
|
frt_os_write_vint(fdt_out, fi->number);
|
1995
1917
|
frt_os_write_vint(fdt_out, df_size);
|
1996
1918
|
|
1997
|
-
if (
|
1919
|
+
if (bits_is_compressed(fi->bits)) {
|
1998
1920
|
compression = frt_fi_get_compression(fi);
|
1999
1921
|
for (j = 0; j < df_size; j++) {
|
2000
1922
|
const int length = df->lengths[j];
|
@@ -2048,7 +1970,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
2048
1970
|
FrtPosting *posting;
|
2049
1971
|
FrtOccurence *occ;
|
2050
1972
|
FrtFieldInfo *fi = fw->fis->fields[field_num];
|
2051
|
-
int store_positions =
|
1973
|
+
int store_positions = bits_store_positions(fi->bits);
|
2052
1974
|
|
2053
1975
|
frt_ary_grow(fw->tv_fields);
|
2054
1976
|
frt_ary_last(fw->tv_fields).field_num = field_num;
|
@@ -2080,7 +2002,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
2080
2002
|
|
2081
2003
|
}
|
2082
2004
|
|
2083
|
-
if (
|
2005
|
+
if (bits_store_offsets(fi->bits)) {
|
2084
2006
|
/* use delta encoding for offsets */
|
2085
2007
|
frt_i64 last_end = 0;
|
2086
2008
|
frt_os_write_vint(fdt_out, offset_count); /* write shared prefix length */
|
@@ -4042,8 +3964,8 @@ void frt_ir_commit(FrtIndexReader *ir)
|
|
4042
3964
|
|
4043
3965
|
void frt_ir_close(FrtIndexReader *ir) {
|
4044
3966
|
if (ir->ref_cnt == 0) {
|
4045
|
-
fprintf(stderr, "
|
4046
|
-
|
3967
|
+
fprintf(stderr, "Warning: IndexReader ref_cnt to low\n");
|
3968
|
+
return;
|
4047
3969
|
}
|
4048
3970
|
|
4049
3971
|
if (FRT_DEREF(ir) == 0) {
|
@@ -4285,7 +4207,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
4285
4207
|
FrtFieldInfo *fi;
|
4286
4208
|
for (i = field_cnt - 1; i >= 0; i--) {
|
4287
4209
|
fi = ir->fis->fields[i];
|
4288
|
-
if (
|
4210
|
+
if (bits_is_indexed(fi->bits)) {
|
4289
4211
|
Norm *norm = (Norm *)frt_h_get_int(SR(ir)->norms, fi->number);
|
4290
4212
|
if (norm && norm->is_dirty) {
|
4291
4213
|
norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
|
@@ -4415,7 +4337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
|
|
4415
4337
|
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(ir->fis->field_dict, (void *)field);
|
4416
4338
|
FrtFieldsReader *fr;
|
4417
4339
|
|
4418
|
-
if (!fi || !
|
4340
|
+
if (!fi || !bits_store_term_vector(fi->bits) || !SR(ir)->fr || !(fr = sr_fr(SR(ir)))) {
|
4419
4341
|
return NULL;
|
4420
4342
|
}
|
4421
4343
|
|
@@ -4903,7 +4825,7 @@ FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, co
|
|
4903
4825
|
ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
|
4904
4826
|
FrtMultiReader *mr = MR(ir);
|
4905
4827
|
/* defaults don't matter, this is just for reading fields, not adding */
|
4906
|
-
FrtFieldInfos *fis = frt_fis_new(
|
4828
|
+
FrtFieldInfos *fis = frt_fis_new(0);
|
4907
4829
|
int i, j;
|
4908
4830
|
bool need_field_map = false;
|
4909
4831
|
|
@@ -5082,10 +5004,10 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
|
|
5082
5004
|
static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
|
5083
5005
|
{
|
5084
5006
|
FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
|
5085
|
-
fld_inv->is_tokenized =
|
5086
|
-
fld_inv->store_term_vector =
|
5087
|
-
fld_inv->store_offsets =
|
5088
|
-
if ((fld_inv->has_norms =
|
5007
|
+
fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
|
5008
|
+
fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
|
5009
|
+
fld_inv->store_offsets = bits_store_offsets(fi->bits);
|
5010
|
+
if ((fld_inv->has_norms = bits_has_norms(fi->bits)) == true) {
|
5089
5011
|
fld_inv->norms = FRT_MP_ALLOC_AND_ZERO_N(dw->mp, frt_uchar,
|
5090
5012
|
dw->max_buffered_docs);
|
5091
5013
|
}
|
@@ -5235,11 +5157,11 @@ static void dw_flush(FrtDocWriter *dw)
|
|
5235
5157
|
|
5236
5158
|
for (i = 0; i < fields_count; i++) {
|
5237
5159
|
fi = fis->fields[i];
|
5238
|
-
if (!
|
5160
|
+
if (!bits_is_indexed(fi->bits) || NULL ==
|
5239
5161
|
(fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number))) {
|
5240
5162
|
continue;
|
5241
5163
|
}
|
5242
|
-
if (!
|
5164
|
+
if (!bits_omit_norms(fi->bits)) {
|
5243
5165
|
dw_write_norms(dw, fld_inv);
|
5244
5166
|
}
|
5245
5167
|
|
@@ -5499,7 +5421,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
|
|
5499
5421
|
for (i = 0; i < doc_size; i++) {
|
5500
5422
|
df = doc->fields[i];
|
5501
5423
|
fi = frt_fis_get_field(dw->fis, df->name);
|
5502
|
-
if (!
|
5424
|
+
if (!bits_is_indexed(fi->bits)) {
|
5503
5425
|
continue;
|
5504
5426
|
}
|
5505
5427
|
fld_inv = frt_dw_get_fld_inv(dw, fi);
|
@@ -5941,7 +5863,7 @@ static void sm_merge_norms(SegmentMerger *sm)
|
|
5941
5863
|
const int seg_cnt = sm->seg_cnt;
|
5942
5864
|
for (i = sm->fis->size - 1; i >= 0; i--) {
|
5943
5865
|
fi = sm->fis->fields[i];
|
5944
|
-
if (
|
5866
|
+
if (bits_has_norms(fi->bits)) {
|
5945
5867
|
si = sm->si;
|
5946
5868
|
frt_si_advance_norm_gen(si, i);
|
5947
5869
|
si_norm_file_name(si, file_name, i);
|
@@ -6042,7 +5964,7 @@ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegm
|
|
6042
5964
|
|
6043
5965
|
/* Field norm file_names */
|
6044
5966
|
for (i = fis->size - 1; i >= 0; i--) {
|
6045
|
-
if (
|
5967
|
+
if (bits_has_norms(fis->fields[i]->bits) && si_norm_file_name(si, file_name, i)) {
|
6046
5968
|
frt_cw_add_file(cw, file_name);
|
6047
5969
|
}
|
6048
5970
|
}
|
@@ -6510,7 +6432,7 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
|
|
6510
6432
|
char file_name_out[FRT_SEGMENT_NAME_MAX_LENGTH];
|
6511
6433
|
|
6512
6434
|
for (i = 0; i < field_cnt; i++) {
|
6513
|
-
if (
|
6435
|
+
if (bits_has_norms(fis->fields[i]->bits)
|
6514
6436
|
&& si_norm_file_name(sr->si, file_name_in, i)) {
|
6515
6437
|
FrtStore *store = (sr->si->use_compound_file
|
6516
6438
|
&& sr->si->norm_gens[i] == 0) ? sr->cfs_store
|
@@ -6567,8 +6489,7 @@ static void iw_add_segment(FrtIndexWriter *iw, FrtSegmentReader *sr)
|
|
6567
6489
|
FrtFieldInfo *fi = sub_fis->fields[j];
|
6568
6490
|
FrtFieldInfo *new_fi = frt_fis_get_field(fis, fi->name);
|
6569
6491
|
if (NULL == new_fi) {
|
6570
|
-
new_fi = frt_fi_new(fi->name,
|
6571
|
-
new_fi->bits = fi->bits;
|
6492
|
+
new_fi = frt_fi_new(fi->name, fi->bits);
|
6572
6493
|
frt_fis_add_field(fis, new_fi);
|
6573
6494
|
}
|
6574
6495
|
new_fi->bits |= fi->bits;
|
@@ -62,27 +62,7 @@ extern FrtHash *frt_co_hash_create();
|
|
62
62
|
*
|
63
63
|
****************************************************************************/
|
64
64
|
|
65
|
-
|
66
|
-
FRT_STORE_NO = 0,
|
67
|
-
FRT_STORE_YES = 1,
|
68
|
-
} FrtStoreValue;
|
69
|
-
|
70
|
-
typedef enum {
|
71
|
-
FRT_INDEX_NO = 0,
|
72
|
-
FRT_INDEX_UNTOKENIZED = 1,
|
73
|
-
FRT_INDEX_YES = 3,
|
74
|
-
FRT_INDEX_UNTOKENIZED_OMIT_NORMS = 5,
|
75
|
-
FRT_INDEX_YES_OMIT_NORMS = 7
|
76
|
-
} FrtIndexValue;
|
77
|
-
|
78
|
-
typedef enum {
|
79
|
-
FRT_TERM_VECTOR_NO = 0,
|
80
|
-
FRT_TERM_VECTOR_YES = 1,
|
81
|
-
FRT_TERM_VECTOR_WITH_POSITIONS = 3,
|
82
|
-
FRT_TERM_VECTOR_WITH_OFFSETS = 5,
|
83
|
-
FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
|
84
|
-
} FrtTermVectorValue;
|
85
|
-
|
65
|
+
#define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
|
86
66
|
#define FRT_FI_IS_STORED_BM 0x001
|
87
67
|
#define FRT_FI_IS_COMPRESSED_BM 0x002
|
88
68
|
#define FRT_FI_IS_INDEXED_BM 0x004
|
@@ -105,24 +85,24 @@ typedef struct FrtFieldInfo {
|
|
105
85
|
} FrtFieldInfo;
|
106
86
|
|
107
87
|
extern FrtFieldInfo *frt_fi_alloc();
|
108
|
-
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name,
|
109
|
-
extern FrtFieldInfo *frt_fi_new(ID name,
|
88
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
|
89
|
+
extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
|
110
90
|
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
111
91
|
extern void frt_fi_deref(FrtFieldInfo *fi);
|
112
92
|
|
113
|
-
#define
|
114
|
-
#define
|
115
|
-
#define
|
116
|
-
#define
|
117
|
-
#define
|
118
|
-
#define
|
119
|
-
#define
|
120
|
-
#define
|
121
|
-
#define
|
122
|
-
#define
|
123
|
-
#define
|
124
|
-
#define
|
125
|
-
((
|
93
|
+
#define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
|
94
|
+
#define bits_is_compressed(bits) ((bits & FRT_FI_IS_COMPRESSED_BM) != 0)
|
95
|
+
#define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
96
|
+
#define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
97
|
+
#define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
98
|
+
#define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
|
99
|
+
#define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
100
|
+
#define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
101
|
+
#define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
102
|
+
#define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
103
|
+
#define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
104
|
+
#define bits_has_norms(bits)\
|
105
|
+
((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
126
106
|
|
127
107
|
/****************************************************************************
|
128
108
|
*
|
@@ -133,21 +113,18 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
|
|
133
113
|
#define FIELD_INFOS_INIT_CAPA 4
|
134
114
|
/* carry changes over to dummy_fis in test/test_segments.c */
|
135
115
|
typedef struct FrtFieldInfos {
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
int capa;
|
142
|
-
FrtFieldInfo **fields;
|
143
|
-
FrtHash *field_dict;
|
116
|
+
unsigned int bits;
|
117
|
+
int size;
|
118
|
+
int capa;
|
119
|
+
FrtFieldInfo **fields;
|
120
|
+
FrtHash *field_dict;
|
144
121
|
_Atomic unsigned int ref_cnt;
|
145
|
-
VALUE
|
122
|
+
VALUE rfis;
|
146
123
|
} FrtFieldInfos;
|
147
124
|
|
148
125
|
FrtFieldInfos *frt_fis_alloc();
|
149
|
-
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis,
|
150
|
-
FrtFieldInfos *frt_fis_new(
|
126
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits);
|
127
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits);
|
151
128
|
extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
|
152
129
|
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
|
153
130
|
extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
|
@@ -151,8 +151,8 @@ static FrtMatchVector *spanq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, Fr
|
|
151
151
|
FrtMatchVector *full_mv = frt_matchv_new();
|
152
152
|
FrtHashSet *terms = SpQ(self)->get_terms(self);
|
153
153
|
/* FIXME What is going on here? Need to document this! */
|
154
|
-
ir->fis = frt_fis_new(
|
155
|
-
frt_fis_add_field(ir->fis, frt_fi_new(tv->field,
|
154
|
+
ir->fis = frt_fis_new(0);
|
155
|
+
frt_fis_add_field(ir->fis, frt_fi_new(tv->field, 0));
|
156
156
|
ir->store = (FrtStore *)tv;
|
157
157
|
FRT_REF((FrtStore *)tv);
|
158
158
|
ir->term_positions = &spanq_ir_term_positions;
|
@@ -6,7 +6,7 @@
|
|
6
6
|
extern rb_encoding *utf8_encoding;
|
7
7
|
|
8
8
|
static FrtFieldInfos *create_fis(void) {
|
9
|
-
FrtFieldInfos *fis = frt_fis_new(
|
9
|
+
FrtFieldInfos *fis = frt_fis_new(0 | FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM);
|
10
10
|
return fis;
|
11
11
|
}
|
12
12
|
|