isomorfeus-ferret 0.14.3 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,7 +44,7 @@ const FrtConfig frt_default_config = {
44
44
  static void ste_reset(FrtTermEnum *te);
45
45
  static char *ste_next(FrtTermEnum *te);
46
46
 
47
- #define FORMAT 0
47
+ #define FORMAT 15
48
48
  #define SEGMENTS_GEN_FILE_NAME "segments"
49
49
  #define MAX_EXT_LEN 10
50
50
  #define FRT_COMPRESSION_BUFFER_SIZE 16348
@@ -213,78 +213,11 @@ FrtHash *frt_co_hash_create(void) {
213
213
  *
214
214
  ****************************************************************************/
215
215
 
216
- static void fi_set_store(FrtFieldInfo *fi, FrtStoreValue store) {
217
- switch (store) {
218
- case FRT_STORE_NO:
219
- break;
220
- case FRT_STORE_YES:
221
- fi->bits |= FRT_FI_IS_STORED_BM;
222
- break;
223
- }
224
- }
225
-
226
- static void fi_set_compression(FrtFieldInfo *fi, FrtCompressionType compression) {
227
- switch (compression) {
228
- case FRT_COMPRESSION_NONE:
229
- break;
230
- case FRT_COMPRESSION_BROTLI:
231
- fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
232
- break;
233
- case FRT_COMPRESSION_BZ2:
234
- fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
235
- break;
236
- case FRT_COMPRESSION_LZ4:
237
- fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
238
- break;
239
- }
240
- }
241
-
242
- static void fi_set_index(FrtFieldInfo *fi, FrtIndexValue index) {
243
- switch (index) {
244
- case FRT_INDEX_NO:
245
- break;
246
- case FRT_INDEX_YES:
247
- fi->bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM;
248
- break;
249
- case FRT_INDEX_UNTOKENIZED:
250
- fi->bits |= FRT_FI_IS_INDEXED_BM;
251
- break;
252
- case FRT_INDEX_YES_OMIT_NORMS:
253
- fi->bits |= FRT_FI_OMIT_NORMS_BM | FRT_FI_IS_INDEXED_BM |
254
- FRT_FI_IS_TOKENIZED_BM;
255
- break;
256
- case FRT_INDEX_UNTOKENIZED_OMIT_NORMS:
257
- fi->bits |= FRT_FI_OMIT_NORMS_BM | FRT_FI_IS_INDEXED_BM;
258
- break;
259
- }
260
- }
261
-
262
- static void fi_set_term_vector(FrtFieldInfo *fi, FrtTermVectorValue term_vector) {
263
- switch (term_vector) {
264
- case FRT_TERM_VECTOR_NO:
265
- break;
266
- case FRT_TERM_VECTOR_YES:
267
- fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM;
268
- break;
269
- case FRT_TERM_VECTOR_WITH_POSITIONS:
270
- fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM;
271
- break;
272
- case FRT_TERM_VECTOR_WITH_OFFSETS:
273
- fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_OFFSETS_BM;
274
- break;
275
- case FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS:
276
- fi->bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM |
277
- FRT_FI_STORE_OFFSETS_BM;
278
- break;
279
- }
280
- }
281
-
282
- static void fi_check_params(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
283
- (void)store;
284
- if ((index == FRT_INDEX_NO) && (term_vector != FRT_TERM_VECTOR_NO)) {
216
+ static void fi_check_params(unsigned int bits) {
217
+ if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
285
218
  FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
286
219
  }
287
- if ((compression != FRT_COMPRESSION_NONE) && (store == FRT_STORE_NO)) {
220
+ if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
288
221
  FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
289
222
  }
290
223
  }
@@ -293,25 +226,21 @@ FrtFieldInfo *frt_fi_alloc(void) {
293
226
  return FRT_ALLOC(FrtFieldInfo);
294
227
  }
295
228
 
296
- FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
229
+ FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
297
230
  assert(NULL != name);
298
- fi_check_params(store, compression, index, term_vector);
231
+ fi_check_params(bits);
299
232
  fi->name = name;
300
233
  fi->boost = 1.0f;
301
- fi->bits = 0;
302
- fi_set_store(fi, store);
303
- fi_set_compression(fi, compression);
304
- fi_set_index(fi, index);
305
- fi_set_term_vector(fi, term_vector);
234
+ fi->bits = bits;
306
235
  fi->number = 0;
307
236
  fi->ref_cnt = 1;
308
237
  fi->rfi = Qnil;
309
238
  return fi;
310
239
  }
311
240
 
312
- FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
241
+ FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
313
242
  FrtFieldInfo *fi = frt_fi_alloc();
314
- return frt_fi_init(fi, name, store, compression, index, term_vector);
243
+ return frt_fi_init(fi, name, bits);
315
244
  }
316
245
 
317
246
  void frt_fi_deref(FrtFieldInfo *fi) {
@@ -319,12 +248,12 @@ void frt_fi_deref(FrtFieldInfo *fi) {
319
248
  }
320
249
 
321
250
  FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
322
- if (fi_is_compressed(fi)) {
323
- if (fi_is_compressed_brotli(fi)) {
251
+ if (bits_is_compressed(fi->bits)) {
252
+ if (bits_is_compressed_brotli(fi->bits)) {
324
253
  return FRT_COMPRESSION_BROTLI;
325
- } else if (fi_is_compressed_bz2(fi)) {
254
+ } else if (bits_is_compressed_bz2(fi->bits)) {
326
255
  return FRT_COMPRESSION_BZ2;
327
- } else if (fi_is_compressed_lz4(fi)) {
256
+ } else if (bits_is_compressed_lz4(fi->bits)) {
328
257
  return FRT_COMPRESSION_LZ4;
329
258
  } else {
330
259
  return FRT_COMPRESSION_BROTLI;
@@ -340,14 +269,14 @@ char *frt_fi_to_s(FrtFieldInfo *fi)
340
269
  char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
341
270
  char *s = str;
342
271
  s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
343
- fi_is_stored(fi) ? "is_stored, " : "",
344
- fi_is_compressed(fi) ? "is_compressed, " : "",
345
- fi_is_indexed(fi) ? "is_indexed, " : "",
346
- fi_is_tokenized(fi) ? "is_tokenized, " : "",
347
- fi_omit_norms(fi) ? "omit_norms, " : "",
348
- fi_store_term_vector(fi) ? "store_term_vector, " : "",
349
- fi_store_positions(fi) ? "store_positions, " : "",
350
- fi_store_offsets(fi) ? "store_offsets, " : "");
272
+ bits_is_stored(fi->bits) ? "is_stored, " : "",
273
+ bits_is_compressed(fi->bits) ? "is_compressed, " : "",
274
+ bits_is_indexed(fi->bits) ? "is_indexed, " : "",
275
+ bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
276
+ bits_omit_norms(fi->bits) ? "omit_norms, " : "",
277
+ bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
278
+ bits_store_positions(fi->bits) ? "store_positions, " : "",
279
+ bits_store_offsets(fi->bits) ? "store_offsets, " : "");
351
280
  s -= 2;
352
281
  if (*s != ',') {
353
282
  s += 2;
@@ -367,24 +296,21 @@ FrtFieldInfos *frt_fis_alloc(void) {
367
296
  return FRT_ALLOC(FrtFieldInfos);
368
297
  }
369
298
 
370
- FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store_val, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
371
- fi_check_params(store_val, compression, index, term_vector);
299
+ FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
300
+ fi_check_params(bits);
372
301
  fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
373
302
  fis->size = 0;
374
303
  fis->capa = FIELD_INFOS_INIT_CAPA;
375
304
  fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
376
- fis->store_val = store_val;
377
- fis->compression = compression;
378
- fis->index = index;
379
- fis->term_vector = term_vector;
305
+ fis->bits = bits;
380
306
  fis->ref_cnt = 1;
381
307
  fis->rfis = Qnil;
382
308
  return fis;
383
309
  }
384
310
 
385
- FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
311
+ FrtFieldInfos *frt_fis_new(unsigned int bits) {
386
312
  FrtFieldInfos *fis = frt_fis_alloc();
387
- return frt_fis_init(fis, store, compression, index, term_vector);
313
+ return frt_fis_init(fis, bits);
388
314
  }
389
315
 
390
316
  FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
@@ -415,7 +341,7 @@ int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
415
341
  FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
416
342
  FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
417
343
  if (!fi) {
418
- fi = (FrtFieldInfo*)frt_fi_new(name, fis->store_val, fis->compression, fis->index, fis->term_vector);
344
+ fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
419
345
  frt_fis_add_field(fis, fi);
420
346
  }
421
347
  return fi;
@@ -427,14 +353,10 @@ FrtFieldInfos *frt_fis_read(FrtInStream *is)
427
353
  char *field_name;
428
354
  FRT_TRY
429
355
  do {
430
- FrtTermVectorValue term_vector_val;
431
356
  volatile int i;
432
357
  union { frt_u32 i; float f; } tmp;
433
358
  FrtFieldInfo *volatile fi;
434
- FrtStoreValue store_val = (FrtStoreValue)frt_is_read_vint(is);
435
- FrtIndexValue index_val = (FrtIndexValue)frt_is_read_vint(is);
436
- term_vector_val = (FrtTermVectorValue)frt_is_read_vint(is);
437
- fis = frt_fis_new(store_val, FRT_COMPRESSION_NONE, index_val, term_vector_val); // TODO compression, read from store?
359
+ fis = frt_fis_new(frt_is_read_vint(is));
438
360
  for (i = frt_is_read_vint(is); i > 0; i--) {
439
361
  fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
440
362
  FRT_TRY
@@ -464,9 +386,7 @@ void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
464
386
  FrtFieldInfo *fi;
465
387
  const int fis_size = fis->size;
466
388
 
467
- frt_os_write_vint(os, fis->store_val);
468
- frt_os_write_vint(os, fis->index);
469
- frt_os_write_vint(os, fis->term_vector);
389
+ frt_os_write_vint(os, fis->bits);
470
390
  frt_os_write_vint(os, fis->size);
471
391
 
472
392
  for (i = 0; i < fis_size; i++) {
@@ -536,9 +456,9 @@ char *frt_fis_to_s(FrtFieldInfos *fis)
536
456
  " index: %s\n"
537
457
  " term_vector: %s\n"
538
458
  "fields:\n",
539
- store_str[fis->store_val],
540
- index_str[fis->index],
541
- term_vector_str[fis->term_vector]);
459
+ store_str[fis->bits & 0x3],
460
+ index_str[(fis->bits >> 2) & 0x7],
461
+ term_vector_str[(fis->bits >> 5) & 0x7]);
542
462
  for (i = 0; i < fis_size; i++) {
543
463
  fi = fis->fields[i];
544
464
  pos += sprintf(buf + pos,
@@ -568,7 +488,7 @@ static bool fis_has_vectors(FrtFieldInfos *fis)
568
488
  const int fis_size = fis->size;
569
489
 
570
490
  for (i = 0; i < fis_size; i++) {
571
- if (fi_store_term_vector(fis->fields[i])) {
491
+ if (bits_store_term_vector(fis->fields[i]->bits)) {
572
492
  return true;
573
493
  }
574
494
  }
@@ -1047,6 +967,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
1047
967
  {
1048
968
  int seg_cnt;
1049
969
  int i;
970
+ frt_u32 format = 0;
1050
971
  volatile bool success = false;
1051
972
  char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
1052
973
  FrtInStream *volatile is = NULL;
@@ -1058,7 +979,9 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
1058
979
  sis->store = store;
1059
980
  FRT_REF(store);
1060
981
  sis->generation = fsf->generation;
1061
- sis->format = frt_is_read_u32(is); /* do nothing. it's the first version */
982
+ format = frt_is_read_u32(is);
983
+ if (format == FORMAT) sis->format = format;
984
+ else FRT_RAISE(FRT_EXCEPTION, "Wrong index format, required format '%u', format of given index '%u'", FORMAT, format);
1062
985
  sis->version = frt_is_read_u64(is);
1063
986
  sis->counter = frt_is_read_u64(is);
1064
987
  seg_cnt = frt_is_read_vint(is);
@@ -1073,9 +996,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
1073
996
  success = true;
1074
997
  FRT_XFINALLY
1075
998
  if (is) frt_is_close(is);
1076
- if (!success) {
1077
- frt_sis_destroy(sis);
1078
- }
999
+ if (!success) frt_sis_destroy(sis);
1079
1000
  FRT_XENDTRY
1080
1001
  fsf->ret.sis = sis;
1081
1002
  }
@@ -1126,16 +1047,17 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
1126
1047
  static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
1127
1048
  {
1128
1049
  FrtInStream *is;
1129
- frt_u64 version;
1050
+ frt_u32 format = 0;
1051
+ frt_u64 version = 0;
1130
1052
  char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
1131
1053
 
1132
1054
  segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
1133
1055
  is = store->open_input(store, seg_file_name);
1134
- version = 0;
1135
1056
 
1136
1057
  FRT_TRY
1137
- frt_is_read_u32(is); // format
1138
- version = frt_is_read_u64(is);
1058
+ format = frt_is_read_u32(is); // format
1059
+ if (format == FORMAT) version = frt_is_read_u64(is);
1060
+ else FRT_RAISE(FRT_EXCEPTION, "Wrong index format, required format '%u', format of given index '%u'", FORMAT, format);
1139
1061
  FRT_XFINALLY
1140
1062
  frt_is_close(is);
1141
1063
  FRT_XENDTRY
@@ -1671,8 +1593,8 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
1671
1593
 
1672
1594
  if (num_terms > 0) {
1673
1595
  int i, j, delta_start, delta_len, total_len, freq;
1674
- int store_positions = fi_store_positions(fi);
1675
- int store_offsets = fi_store_offsets(fi);
1596
+ int store_positions = bits_store_positions(fi->bits);
1597
+ int store_offsets = bits_store_offsets(fi->bits);
1676
1598
  frt_uchar buffer[FRT_MAX_WORD_SIZE];
1677
1599
  FrtTVTerm *term;
1678
1600
 
@@ -1975,7 +1897,7 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
1975
1897
 
1976
1898
  for (i = 0; i < doc_size; i++) {
1977
1899
  df = doc->fields[i];
1978
- if (fi_is_stored(frt_fis_get_or_add_field(fw->fis, df->name))) {
1900
+ if (bits_is_stored(frt_fis_get_or_add_field(fw->fis, df->name)->bits)) {
1979
1901
  stored_cnt++;
1980
1902
  }
1981
1903
  }
@@ -1989,12 +1911,12 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
1989
1911
  for (i = 0; i < doc_size; i++) {
1990
1912
  df = doc->fields[i];
1991
1913
  fi = frt_fis_get_field(fw->fis, df->name);
1992
- if (fi_is_stored(fi)) {
1914
+ if (bits_is_stored(fi->bits)) {
1993
1915
  const int df_size = df->size;
1994
1916
  frt_os_write_vint(fdt_out, fi->number);
1995
1917
  frt_os_write_vint(fdt_out, df_size);
1996
1918
 
1997
- if (fi_is_compressed(fi)) {
1919
+ if (bits_is_compressed(fi->bits)) {
1998
1920
  compression = frt_fi_get_compression(fi);
1999
1921
  for (j = 0; j < df_size; j++) {
2000
1922
  const int length = df->lengths[j];
@@ -2048,7 +1970,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
2048
1970
  FrtPosting *posting;
2049
1971
  FrtOccurence *occ;
2050
1972
  FrtFieldInfo *fi = fw->fis->fields[field_num];
2051
- int store_positions = fi_store_positions(fi);
1973
+ int store_positions = bits_store_positions(fi->bits);
2052
1974
 
2053
1975
  frt_ary_grow(fw->tv_fields);
2054
1976
  frt_ary_last(fw->tv_fields).field_num = field_num;
@@ -2080,7 +2002,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
2080
2002
 
2081
2003
  }
2082
2004
 
2083
- if (fi_store_offsets(fi)) {
2005
+ if (bits_store_offsets(fi->bits)) {
2084
2006
  /* use delta encoding for offsets */
2085
2007
  frt_i64 last_end = 0;
2086
2008
  frt_os_write_vint(fdt_out, offset_count); /* write shared prefix length */
@@ -4042,8 +3964,8 @@ void frt_ir_commit(FrtIndexReader *ir)
4042
3964
 
4043
3965
  void frt_ir_close(FrtIndexReader *ir) {
4044
3966
  if (ir->ref_cnt == 0) {
4045
- fprintf(stderr, "ir ref_cnt to low\n");
4046
- FRT_RAISE(FRT_STATE_ERROR, "ir ref_cnt to low\n");
3967
+ fprintf(stderr, "Warning: IndexReader ref_cnt to low\n");
3968
+ return;
4047
3969
  }
4048
3970
 
4049
3971
  if (FRT_DEREF(ir) == 0) {
@@ -4285,7 +4207,7 @@ static void sr_commit_i(FrtIndexReader *ir)
4285
4207
  FrtFieldInfo *fi;
4286
4208
  for (i = field_cnt - 1; i >= 0; i--) {
4287
4209
  fi = ir->fis->fields[i];
4288
- if (fi_is_indexed(fi)) {
4210
+ if (bits_is_indexed(fi->bits)) {
4289
4211
  Norm *norm = (Norm *)frt_h_get_int(SR(ir)->norms, fi->number);
4290
4212
  if (norm && norm->is_dirty) {
4291
4213
  norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
@@ -4415,7 +4337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
4415
4337
  FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(ir->fis->field_dict, (void *)field);
4416
4338
  FrtFieldsReader *fr;
4417
4339
 
4418
- if (!fi || !fi_store_term_vector(fi) || !SR(ir)->fr || !(fr = sr_fr(SR(ir)))) {
4340
+ if (!fi || !bits_store_term_vector(fi->bits) || !SR(ir)->fr || !(fr = sr_fr(SR(ir)))) {
4419
4341
  return NULL;
4420
4342
  }
4421
4343
 
@@ -4903,7 +4825,7 @@ FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, co
4903
4825
  ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
4904
4826
  FrtMultiReader *mr = MR(ir);
4905
4827
  /* defaults don't matter, this is just for reading fields, not adding */
4906
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
4828
+ FrtFieldInfos *fis = frt_fis_new(0);
4907
4829
  int i, j;
4908
4830
  bool need_field_map = false;
4909
4831
 
@@ -5082,10 +5004,10 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
5082
5004
  static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
5083
5005
  {
5084
5006
  FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
5085
- fld_inv->is_tokenized = fi_is_tokenized(fi);
5086
- fld_inv->store_term_vector = fi_store_term_vector(fi);
5087
- fld_inv->store_offsets = fi_store_offsets(fi);
5088
- if ((fld_inv->has_norms = fi_has_norms(fi)) == true) {
5007
+ fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
5008
+ fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
5009
+ fld_inv->store_offsets = bits_store_offsets(fi->bits);
5010
+ if ((fld_inv->has_norms = bits_has_norms(fi->bits)) == true) {
5089
5011
  fld_inv->norms = FRT_MP_ALLOC_AND_ZERO_N(dw->mp, frt_uchar,
5090
5012
  dw->max_buffered_docs);
5091
5013
  }
@@ -5235,11 +5157,11 @@ static void dw_flush(FrtDocWriter *dw)
5235
5157
 
5236
5158
  for (i = 0; i < fields_count; i++) {
5237
5159
  fi = fis->fields[i];
5238
- if (!fi_is_indexed(fi) || NULL ==
5160
+ if (!bits_is_indexed(fi->bits) || NULL ==
5239
5161
  (fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number))) {
5240
5162
  continue;
5241
5163
  }
5242
- if (!fi_omit_norms(fi)) {
5164
+ if (!bits_omit_norms(fi->bits)) {
5243
5165
  dw_write_norms(dw, fld_inv);
5244
5166
  }
5245
5167
 
@@ -5499,7 +5421,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
5499
5421
  for (i = 0; i < doc_size; i++) {
5500
5422
  df = doc->fields[i];
5501
5423
  fi = frt_fis_get_field(dw->fis, df->name);
5502
- if (!fi_is_indexed(fi)) {
5424
+ if (!bits_is_indexed(fi->bits)) {
5503
5425
  continue;
5504
5426
  }
5505
5427
  fld_inv = frt_dw_get_fld_inv(dw, fi);
@@ -5941,7 +5863,7 @@ static void sm_merge_norms(SegmentMerger *sm)
5941
5863
  const int seg_cnt = sm->seg_cnt;
5942
5864
  for (i = sm->fis->size - 1; i >= 0; i--) {
5943
5865
  fi = sm->fis->fields[i];
5944
- if (fi_has_norms(fi)) {
5866
+ if (bits_has_norms(fi->bits)) {
5945
5867
  si = sm->si;
5946
5868
  frt_si_advance_norm_gen(si, i);
5947
5869
  si_norm_file_name(si, file_name, i);
@@ -6042,7 +5964,7 @@ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegm
6042
5964
 
6043
5965
  /* Field norm file_names */
6044
5966
  for (i = fis->size - 1; i >= 0; i--) {
6045
- if (fi_has_norms(fis->fields[i]) && si_norm_file_name(si, file_name, i)) {
5967
+ if (bits_has_norms(fis->fields[i]->bits) && si_norm_file_name(si, file_name, i)) {
6046
5968
  frt_cw_add_file(cw, file_name);
6047
5969
  }
6048
5970
  }
@@ -6510,7 +6432,7 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6510
6432
  char file_name_out[FRT_SEGMENT_NAME_MAX_LENGTH];
6511
6433
 
6512
6434
  for (i = 0; i < field_cnt; i++) {
6513
- if (fi_has_norms(fis->fields[i])
6435
+ if (bits_has_norms(fis->fields[i]->bits)
6514
6436
  && si_norm_file_name(sr->si, file_name_in, i)) {
6515
6437
  FrtStore *store = (sr->si->use_compound_file
6516
6438
  && sr->si->norm_gens[i] == 0) ? sr->cfs_store
@@ -6567,8 +6489,7 @@ static void iw_add_segment(FrtIndexWriter *iw, FrtSegmentReader *sr)
6567
6489
  FrtFieldInfo *fi = sub_fis->fields[j];
6568
6490
  FrtFieldInfo *new_fi = frt_fis_get_field(fis, fi->name);
6569
6491
  if (NULL == new_fi) {
6570
- new_fi = frt_fi_new(fi->name, FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
6571
- new_fi->bits = fi->bits;
6492
+ new_fi = frt_fi_new(fi->name, fi->bits);
6572
6493
  frt_fis_add_field(fis, new_fi);
6573
6494
  }
6574
6495
  new_fi->bits |= fi->bits;
@@ -62,27 +62,7 @@ extern FrtHash *frt_co_hash_create();
62
62
  *
63
63
  ****************************************************************************/
64
64
 
65
- typedef enum {
66
- FRT_STORE_NO = 0,
67
- FRT_STORE_YES = 1,
68
- } FrtStoreValue;
69
-
70
- typedef enum {
71
- FRT_INDEX_NO = 0,
72
- FRT_INDEX_UNTOKENIZED = 1,
73
- FRT_INDEX_YES = 3,
74
- FRT_INDEX_UNTOKENIZED_OMIT_NORMS = 5,
75
- FRT_INDEX_YES_OMIT_NORMS = 7
76
- } FrtIndexValue;
77
-
78
- typedef enum {
79
- FRT_TERM_VECTOR_NO = 0,
80
- FRT_TERM_VECTOR_YES = 1,
81
- FRT_TERM_VECTOR_WITH_POSITIONS = 3,
82
- FRT_TERM_VECTOR_WITH_OFFSETS = 5,
83
- FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
84
- } FrtTermVectorValue;
85
-
65
+ #define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
86
66
  #define FRT_FI_IS_STORED_BM 0x001
87
67
  #define FRT_FI_IS_COMPRESSED_BM 0x002
88
68
  #define FRT_FI_IS_INDEXED_BM 0x004
@@ -105,24 +85,24 @@ typedef struct FrtFieldInfo {
105
85
  } FrtFieldInfo;
106
86
 
107
87
  extern FrtFieldInfo *frt_fi_alloc();
108
- extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
109
- extern FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
88
+ extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
89
+ extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
110
90
  extern char *frt_fi_to_s(FrtFieldInfo *fi);
111
91
  extern void frt_fi_deref(FrtFieldInfo *fi);
112
92
 
113
- #define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
114
- #define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
115
- #define fi_is_compressed_brotli(fi) (((fi)->bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
116
- #define fi_is_compressed_bz2(fi) (((fi)->bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
117
- #define fi_is_compressed_lz4(fi) (((fi)->bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
118
- #define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
119
- #define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
120
- #define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
121
- #define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
122
- #define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
123
- #define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
124
- #define fi_has_norms(fi)\
125
- (((fi)->bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
93
+ #define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
94
+ #define bits_is_compressed(bits) ((bits & FRT_FI_IS_COMPRESSED_BM) != 0)
95
+ #define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
96
+ #define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
97
+ #define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
98
+ #define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
99
+ #define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
100
+ #define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
101
+ #define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
102
+ #define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
103
+ #define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
104
+ #define bits_has_norms(bits)\
105
+ ((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
126
106
 
127
107
  /****************************************************************************
128
108
  *
@@ -133,21 +113,18 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
133
113
  #define FIELD_INFOS_INIT_CAPA 4
134
114
  /* carry changes over to dummy_fis in test/test_segments.c */
135
115
  typedef struct FrtFieldInfos {
136
- FrtStoreValue store_val;
137
- FrtCompressionType compression;
138
- FrtIndexValue index;
139
- FrtTermVectorValue term_vector;
140
- int size;
141
- int capa;
142
- FrtFieldInfo **fields;
143
- FrtHash *field_dict;
116
+ unsigned int bits;
117
+ int size;
118
+ int capa;
119
+ FrtFieldInfo **fields;
120
+ FrtHash *field_dict;
144
121
  _Atomic unsigned int ref_cnt;
145
- VALUE rfis;
122
+ VALUE rfis;
146
123
  } FrtFieldInfos;
147
124
 
148
125
  FrtFieldInfos *frt_fis_alloc();
149
- FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
150
- FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
126
+ FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits);
127
+ FrtFieldInfos *frt_fis_new(unsigned int bits);
151
128
  extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
152
129
  extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
153
130
  extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
@@ -151,8 +151,8 @@ static FrtMatchVector *spanq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, Fr
151
151
  FrtMatchVector *full_mv = frt_matchv_new();
152
152
  FrtHashSet *terms = SpQ(self)->get_terms(self);
153
153
  /* FIXME What is going on here? Need to document this! */
154
- ir->fis = frt_fis_new(FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
155
- frt_fis_add_field(ir->fis, frt_fi_new(tv->field, FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO));
154
+ ir->fis = frt_fis_new(0);
155
+ frt_fis_add_field(ir->fis, frt_fi_new(tv->field, 0));
156
156
  ir->store = (FrtStore *)tv;
157
157
  FRT_REF((FrtStore *)tv);
158
158
  ir->term_positions = &spanq_ir_term_positions;
@@ -6,7 +6,7 @@
6
6
  extern rb_encoding *utf8_encoding;
7
7
 
8
8
  static FrtFieldInfos *create_fis(void) {
9
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
9
+ FrtFieldInfos *fis = frt_fis_new(0 | FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM);
10
10
  return fis;
11
11
  }
12
12