isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -8,6 +8,11 @@
8
8
  #include <ctype.h>
9
9
  #include "brotli_decode.h"
10
10
  #include "brotli_encode.h"
11
+ #include "bzlib.h"
12
+ #include "lz4frame.h"
13
+
14
+ #undef close
15
+ #undef read
11
16
 
12
17
  extern void frt_micro_sleep(const int micro_seconds);
13
18
 
@@ -41,8 +46,9 @@ static char *ste_next(FrtTermEnum *te);
41
46
  #define FORMAT 0
42
47
  #define SEGMENTS_GEN_FILE_NAME "segments"
43
48
  #define MAX_EXT_LEN 10
44
- #define COMPRESSION_BUFFER_SIZE 16348
45
- #define COMPRESSION_LEVEL 9
49
+ #define FRT_COMPRESSION_BUFFER_SIZE 16348
50
+ #define FRT_BROTLI_COMPRESSION_LEVEL 4
51
+ #define FRT_BZIP_COMPRESSION_LEVEL 9
46
52
 
47
53
  /* *** Must be three characters *** */
48
54
  static const char *INDEX_EXTENSIONS[] = {
@@ -103,29 +109,22 @@ static frt_u64 str36_to_u64(char *p)
103
109
  * @param ext extension of the filename (including .)
104
110
  * @param gen generation
105
111
  */
106
- char *frt_fn_for_generation(char *buf,
107
- const char *base,
108
- const char *ext,
109
- frt_i64 gen)
110
- {
112
+ char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i64 gen) {
111
113
  if (-1 == gen) {
112
114
  return NULL;
113
- }
114
- else {
115
+ } else {
115
116
  char b[FRT_SEGMENT_NAME_MAX_LENGTH];
116
117
  char *u = u64_to_str36(b, FRT_SEGMENT_NAME_MAX_LENGTH, (frt_u64)gen);
117
118
  if (ext == NULL) {
118
119
  sprintf(buf, "%s_%s", base, u);
119
- }
120
- else {
120
+ } else {
121
121
  sprintf(buf, "%s_%s.%s", base, u, ext);
122
122
  }
123
123
  return buf;
124
124
  }
125
125
  }
126
126
 
127
- static char *segfn_for_generation(char *buf, frt_u64 generation)
128
- {
127
+ static char *segfn_for_generation(char *buf, frt_u64 generation) {
129
128
  char b[FRT_SEGMENT_NAME_MAX_LENGTH];
130
129
  char *u = u64_to_str36(b, FRT_SEGMENT_NAME_MAX_LENGTH, generation);
131
130
  sprintf(buf, FRT_SEGMENTS_FILE_NAME"_%s", u);
@@ -203,8 +202,7 @@ FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
203
202
  return self;
204
203
  }
205
204
 
206
- FrtHash *frt_co_hash_create()
207
- {
205
+ FrtHash *frt_co_hash_create(void) {
208
206
  return frt_h_new(&co_hash, &co_eq, (frt_free_ft)NULL, (frt_free_ft)&co_destroy);
209
207
  }
210
208
 
@@ -214,22 +212,33 @@ FrtHash *frt_co_hash_create()
214
212
  *
215
213
  ****************************************************************************/
216
214
 
217
- static void fi_set_store(FrtFieldInfo *fi, int store)
218
- {
215
+ static void fi_set_store(FrtFieldInfo *fi, FrtStoreValue store) {
219
216
  switch (store) {
220
217
  case FRT_STORE_NO:
221
218
  break;
222
219
  case FRT_STORE_YES:
223
220
  fi->bits |= FRT_FI_IS_STORED_BM;
224
221
  break;
225
- case FRT_STORE_COMPRESS:
226
- fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_IS_STORED_BM;
222
+ }
223
+ }
224
+
225
+ static void fi_set_compression(FrtFieldInfo *fi, FrtCompressionType compression) {
226
+ switch (compression) {
227
+ case FRT_COMPRESSION_NONE:
228
+ break;
229
+ case FRT_COMPRESSION_BROTLI:
230
+ fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
231
+ break;
232
+ case FRT_COMPRESSION_BZ2:
233
+ fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
234
+ break;
235
+ case FRT_COMPRESSION_LZ4:
236
+ fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
227
237
  break;
228
238
  }
229
239
  }
230
240
 
231
- static void fi_set_index(FrtFieldInfo *fi, int index)
232
- {
241
+ static void fi_set_index(FrtFieldInfo *fi, FrtIndexValue index) {
233
242
  switch (index) {
234
243
  case FRT_INDEX_NO:
235
244
  break;
@@ -249,8 +258,7 @@ static void fi_set_index(FrtFieldInfo *fi, int index)
249
258
  }
250
259
  }
251
260
 
252
- static void fi_set_term_vector(FrtFieldInfo *fi, int term_vector)
253
- {
261
+ static void fi_set_term_vector(FrtFieldInfo *fi, FrtTermVectorValue term_vector) {
254
262
  switch (term_vector) {
255
263
  case FRT_TERM_VECTOR_NO:
256
264
  break;
@@ -270,33 +278,40 @@ static void fi_set_term_vector(FrtFieldInfo *fi, int term_vector)
270
278
  }
271
279
  }
272
280
 
273
- static void fi_check_params(int store, int index, int term_vector)
274
- {
281
+ static void fi_check_params(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
275
282
  (void)store;
276
283
  if ((index == FRT_INDEX_NO) && (term_vector != FRT_TERM_VECTOR_NO)) {
277
- FRT_RAISE(FRT_ARG_ERROR,
278
- "You can't store the term vectors of an unindexed field");
284
+ FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
285
+ }
286
+ if ((compression != FRT_COMPRESSION_NONE) && (store == FRT_STORE_NO)) {
287
+ FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
279
288
  }
280
289
  }
281
290
 
282
- FrtFieldInfo *frt_fi_new(FrtSymbol name,
283
- FrtStoreValue store,
284
- FrtIndexValue index,
285
- FrtTermVectorValue term_vector)
286
- {
287
- FrtFieldInfo *fi = FRT_ALLOC(FrtFieldInfo);
291
+ FrtFieldInfo *frt_fi_alloc(void) {
292
+ return FRT_ALLOC(FrtFieldInfo);
293
+ }
294
+
295
+ FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
288
296
  assert(NULL != name);
289
- fi_check_params(store, index, term_vector);
297
+ fi_check_params(store, compression, index, term_vector);
290
298
  fi->name = name;
291
299
  fi->boost = 1.0f;
292
300
  fi->bits = 0;
293
301
  fi_set_store(fi, store);
302
+ fi_set_compression(fi, compression);
294
303
  fi_set_index(fi, index);
295
304
  fi_set_term_vector(fi, term_vector);
296
305
  fi->ref_cnt = 1;
306
+ fi->rfi = Qnil;
297
307
  return fi;
298
308
  }
299
309
 
310
+ FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
311
+ FrtFieldInfo *fi = frt_fi_alloc();
312
+ return frt_fi_init(fi, name, store, compression, index, term_vector);
313
+ }
314
+
300
315
  void frt_fi_deref(FrtFieldInfo *fi)
301
316
  {
302
317
  if (0 == --(fi->ref_cnt)) {
@@ -304,6 +319,22 @@ void frt_fi_deref(FrtFieldInfo *fi)
304
319
  }
305
320
  }
306
321
 
322
+ FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
323
+ if (fi_is_compressed(fi)) {
324
+ if (fi_is_compressed_brotli(fi)) {
325
+ return FRT_COMPRESSION_BROTLI;
326
+ } else if (fi_is_compressed_bz2(fi)) {
327
+ return FRT_COMPRESSION_BZ2;
328
+ } else if (fi_is_compressed_lz4(fi)) {
329
+ return FRT_COMPRESSION_LZ4;
330
+ } else {
331
+ return FRT_COMPRESSION_BROTLI;
332
+ }
333
+ } else {
334
+ return FRT_COMPRESSION_NONE;
335
+ }
336
+ }
337
+
307
338
  char *frt_fi_to_s(FrtFieldInfo *fi)
308
339
  {
309
340
  const char *fi_name = rb_id2name(fi->name);
@@ -333,24 +364,31 @@ char *frt_fi_to_s(FrtFieldInfo *fi)
333
364
  *
334
365
  ****************************************************************************/
335
366
 
336
- FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtIndexValue index,
337
- FrtTermVectorValue term_vector)
338
- {
339
- FrtFieldInfos *fis = FRT_ALLOC(FrtFieldInfos);
340
- fi_check_params(store, index, term_vector);
367
+ FrtFieldInfos *frt_fis_alloc(void) {
368
+ return FRT_ALLOC(FrtFieldInfos);
369
+ }
370
+
371
+ FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
372
+ fi_check_params(store, compression, index, term_vector);
341
373
  fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
342
374
  fis->size = 0;
343
375
  fis->capa = FIELD_INFOS_INIT_CAPA;
344
376
  fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
345
377
  fis->store = store;
378
+ fis->compression = compression;
346
379
  fis->index = index;
347
380
  fis->term_vector = term_vector;
348
381
  fis->ref_cnt = 1;
382
+ fis->rfis = Qnil;
349
383
  return fis;
350
384
  }
351
385
 
352
- FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi)
353
- {
386
+ FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector) {
387
+ FrtFieldInfos *fis = frt_fis_alloc();
388
+ return frt_fis_init(fis, store, compression, index, term_vector);
389
+ }
390
+
391
+ FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
354
392
  if (fis->size == fis->capa) {
355
393
  fis->capa <<= 1;
356
394
  FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
@@ -364,23 +402,20 @@ FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi)
364
402
  return fi;
365
403
  }
366
404
 
367
- FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, FrtSymbol name)
368
- {
405
+ FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
369
406
  return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
370
407
  }
371
408
 
372
- int frt_fis_get_field_num(FrtFieldInfos *fis, FrtSymbol name)
373
- {
409
+ int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
374
410
  FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
375
411
  if (fi) { return fi->number; }
376
412
  else { return -1; }
377
413
  }
378
414
 
379
- FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, FrtSymbol name)
380
- {
415
+ FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
381
416
  FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
382
417
  if (!fi) {
383
- fi = (FrtFieldInfo*)frt_fi_new(name, fis->store, fis->index, fis->term_vector);
418
+ fi = (FrtFieldInfo*)frt_fi_new(name, fis->store, fis->compression, fis->index, fis->term_vector);
384
419
  frt_fis_add_field(fis, fi);
385
420
  }
386
421
  return fi;
@@ -392,16 +427,14 @@ FrtFieldInfos *frt_fis_read(FrtInStream *is)
392
427
  char *field_name;
393
428
  FRT_TRY
394
429
  do {
395
- FrtStoreValue store_val;
396
- FrtIndexValue index_val;
397
430
  FrtTermVectorValue term_vector_val;
398
431
  volatile int i;
399
432
  union { frt_u32 i; float f; } tmp;
400
433
  FrtFieldInfo *volatile fi;
401
- store_val = (FrtStoreValue)frt_is_read_vint(is);
402
- index_val = (FrtIndexValue)frt_is_read_vint(is);
434
+ FrtStoreValue store_val = (FrtStoreValue)frt_is_read_vint(is);
435
+ FrtIndexValue index_val = (FrtIndexValue)frt_is_read_vint(is);
403
436
  term_vector_val = (FrtTermVectorValue)frt_is_read_vint(is);
404
- fis = frt_fis_new(store_val, index_val, term_vector_val);
437
+ fis = frt_fis_new(store_val, FRT_COMPRESSION_NONE, index_val, term_vector_val); // TODO compression, read from store?
405
438
  for (i = frt_is_read_vint(is); i > 0; i--) {
406
439
  fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
407
440
  FRT_TRY
@@ -803,8 +836,7 @@ static char *sis_next_seg_file_name(char *buf, FrtStore *store)
803
836
 
804
837
  #define GEN_FILE_RETRY_COUNT 10
805
838
  #define GEN_LOOK_AHEAD_COUNT 10
806
- static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf,
807
- void (*run)(FrtStore *store, FindSegmentsFile *fsf))
839
+ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir)
808
840
  {
809
841
  volatile int i;
810
842
  volatile int gen_look_ahead_count = 0;
@@ -911,7 +943,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf,
911
943
  last_gen = gen;
912
944
  FRT_TRY
913
945
  fsf->generation = gen;
914
- run(store, fsf);
946
+ run(store, fsf, ir);
915
947
  FRT_RETURN_EARLY();
916
948
  return;
917
949
  case FRT_IO_ERROR: case FRT_FILE_NOT_FOUND_ERROR: case FRT_EOF_ERROR:
@@ -957,7 +989,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf,
957
989
  * prevSegmentFileName + "'" */
958
990
  FRT_TRY
959
991
  fsf->generation = gen - 1;
960
- run(store, fsf);
992
+ run(store, fsf, ir);
961
993
  /* TODO:LOG "success on fallback " +
962
994
  * prev_seg_file_name */
963
995
 
@@ -1040,7 +1072,7 @@ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
1040
1072
  }
1041
1073
  }
1042
1074
 
1043
- static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf)
1075
+ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
1044
1076
  {
1045
1077
  int seg_cnt;
1046
1078
  int i;
@@ -1079,7 +1111,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf)
1079
1111
  FrtSegmentInfos *frt_sis_read(FrtStore *store)
1080
1112
  {
1081
1113
  FindSegmentsFile fsf;
1082
- sis_find_segments_file(store, &fsf, &frt_sis_read_i);
1114
+ sis_find_segments_file(store, &fsf, &frt_sis_read_i, NULL);
1083
1115
  return fsf.ret.sis;
1084
1116
  }
1085
1117
 
@@ -1119,7 +1151,7 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
1119
1151
  }
1120
1152
  }
1121
1153
 
1122
- static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf)
1154
+ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
1123
1155
  {
1124
1156
  FrtInStream *is;
1125
1157
  frt_u64 version;
@@ -1142,7 +1174,7 @@ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf)
1142
1174
  frt_u64 frt_sis_read_current_version(FrtStore *store)
1143
1175
  {
1144
1176
  FindSegmentsFile fsf;
1145
- sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i);
1177
+ sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i, NULL);
1146
1178
  return fsf.ret.uint64;
1147
1179
  }
1148
1180
 
@@ -1152,18 +1184,17 @@ frt_u64 frt_sis_read_current_version(FrtStore *store)
1152
1184
  *
1153
1185
  ****************************************************************************/
1154
1186
 
1155
- static FrtLazyDocField *lazy_df_new(FrtSymbol name, const int size, bool is_compressed)
1156
- {
1187
+ static FrtLazyDocField *lazy_df_new(ID name, const int size, FrtCompressionType compression) {
1157
1188
  FrtLazyDocField *self = FRT_ALLOC(FrtLazyDocField);
1158
1189
  self->name = name;
1159
1190
  self->size = size;
1160
1191
  self->data = FRT_ALLOC_AND_ZERO_N(FrtLazyDocFieldData, size);
1161
- self->is_compressed = is_compressed;
1192
+ self->compression = compression;
1193
+ self->decompressed = false;
1162
1194
  return self;
1163
1195
  }
1164
1196
 
1165
- static void lazy_df_destroy(FrtLazyDocField *self)
1166
- {
1197
+ static void lazy_df_destroy(FrtLazyDocField *self) {
1167
1198
  int i;
1168
1199
  for (i = self->size - 1; i >= 0; i--) {
1169
1200
  if (self->data[i].text) {
@@ -1174,16 +1205,14 @@ static void lazy_df_destroy(FrtLazyDocField *self)
1174
1205
  free(self);
1175
1206
  }
1176
1207
 
1177
- static void comp_raise()
1178
- {
1208
+ static void comp_raise(void) {
1179
1209
  FRT_RAISE(EXCEPTION, "Compression error");
1180
1210
  }
1181
1211
 
1182
- static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len)
1183
- {
1212
+ static char *is_read_brotli_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
1184
1213
  int buf_out_idx = 0;
1185
1214
  int read_len;
1186
- frt_uchar buf_in[COMPRESSION_BUFFER_SIZE];
1215
+ frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
1187
1216
  const frt_uchar *next_in;
1188
1217
  size_t available_in;
1189
1218
  frt_uchar *buf_out = NULL;
@@ -1195,20 +1224,20 @@ static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *
1195
1224
  if (!b_state) { comp_raise(); return NULL; }
1196
1225
 
1197
1226
  do {
1198
- read_len = compressed_len > COMPRESSION_BUFFER_SIZE ? COMPRESSION_BUFFER_SIZE : compressed_len;
1227
+ read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1199
1228
  frt_is_read_bytes(is, buf_in, read_len);
1200
1229
  compressed_len -= read_len;
1201
1230
  available_in = read_len;
1202
1231
  next_in = buf_in;
1203
- available_out = COMPRESSION_BUFFER_SIZE;
1232
+ available_out = FRT_COMPRESSION_BUFFER_SIZE;
1204
1233
  do {
1205
- FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + COMPRESSION_BUFFER_SIZE);
1234
+ FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
1206
1235
  next_out = buf_out + buf_out_idx;
1207
1236
  b_result = BrotliDecoderDecompressStream(b_state,
1208
1237
  &available_in, &next_in,
1209
1238
  &available_out, &next_out, NULL);
1210
1239
  if (b_result == BROTLI_DECODER_RESULT_ERROR) { comp_raise(); return NULL; }
1211
- buf_out_idx += COMPRESSION_BUFFER_SIZE - available_out;
1240
+ buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - available_out;
1212
1241
  } while (b_result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
1213
1242
  } while (b_result != BROTLI_DECODER_RESULT_SUCCESS && compressed_len > 0);
1214
1243
 
@@ -1220,16 +1249,180 @@ static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *
1220
1249
  return (char *)buf_out;
1221
1250
  }
1222
1251
 
1223
- char *frt_lazy_df_get_data(FrtLazyDocField *self, int i)
1224
- {
1252
+ static void zraise(int ret) {
1253
+ switch (ret) {
1254
+ case BZ_IO_ERROR:
1255
+ if (ferror(stdin))
1256
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: error reading stdin");
1257
+ if (ferror(stdout))
1258
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: error writing stdout");
1259
+ break;
1260
+ case BZ_CONFIG_ERROR:
1261
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: system configuration error");
1262
+ break;
1263
+ case BZ_SEQUENCE_ERROR: /* shouldn't occur if code is correct */
1264
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! sequence error");
1265
+ break;
1266
+ case BZ_PARAM_ERROR: /* shouldn't occur if code is correct */
1267
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! parameter error");
1268
+ break;
1269
+ case BZ_MEM_ERROR:
1270
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: memory error");
1271
+ break;
1272
+ case BZ_DATA_ERROR:
1273
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check error");
1274
+ break;
1275
+ case BZ_DATA_ERROR_MAGIC:
1276
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check - non-matching magic");
1277
+ break;
1278
+ case BZ_UNEXPECTED_EOF:
1279
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: unexpected end-of-file");
1280
+ break;
1281
+ case BZ_OUTBUFF_FULL:
1282
+ FRT_RAISE(FRT_IO_ERROR, "bzlib: output buffer full");
1283
+ break;
1284
+ default:
1285
+ FRT_RAISE(FRT_EXCEPTION, "bzlib: unknown error");
1286
+ }
1287
+ }
1288
+
1289
+ static char *is_read_bz2_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
1290
+ int buf_out_idx = 0, ret, read_len;
1291
+ char *buf_out = NULL;
1292
+ char buf_in[FRT_COMPRESSION_BUFFER_SIZE];
1293
+ bz_stream zstrm;
1294
+ zstrm.bzalloc = NULL;
1295
+ zstrm.bzfree = NULL;
1296
+ zstrm.opaque = NULL;
1297
+ zstrm.next_in = NULL;
1298
+ zstrm.avail_in = 0;
1299
+ if ((ret = BZ2_bzDecompressInit(&zstrm, 0, 0)) != BZ_OK) zraise(ret);
1300
+
1301
+ do {
1302
+ read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1303
+ frt_is_read_bytes(is, (frt_uchar *)buf_in, read_len);
1304
+ compressed_len -= read_len;
1305
+ zstrm.avail_in = read_len;
1306
+ zstrm.next_in = buf_in;
1307
+ zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
1308
+
1309
+ do {
1310
+ REALLOC_N(buf_out, char, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
1311
+ zstrm.next_out = buf_out + buf_out_idx;
1312
+ ret = BZ2_bzDecompress(&zstrm);
1313
+ assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
1314
+ if (ret != BZ_OK && ret != BZ_STREAM_END) {
1315
+ (void)BZ2_bzDecompressEnd(&zstrm);
1316
+ zraise(ret);
1317
+ }
1318
+ buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
1319
+ } while (zstrm.avail_out == 0);
1320
+ } while (ret != BZ_STREAM_END && compressed_len != 0);
1321
+
1322
+ (void)BZ2_bzDecompressEnd(&zstrm);
1323
+
1324
+ FRT_REALLOC_N(buf_out, char, buf_out_idx + 1);
1325
+ buf_out[buf_out_idx] = '\0';
1326
+
1327
+ *len = buf_out_idx;
1328
+ return (char *)buf_out;
1329
+ }
1330
+
1331
+ static char *is_read_lz4_compressed_bytes(FrtInStream *is, int compressed_len, int *length) {
1332
+ frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
1333
+ char *buf_out = NULL;
1334
+ int dc_length = 0;
1335
+ LZ4F_dctx *dctx;
1336
+ LZ4F_frameInfo_t frame_info;
1337
+ LZ4F_errorCode_t dctx_status = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
1338
+ if (LZ4F_isError(dctx_status)) { *length = -1; return NULL; }
1339
+
1340
+ /* header and buffer */
1341
+ int read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1342
+ frt_is_read_bytes(is, buf_in, read_length);
1343
+ compressed_len -= read_length;
1344
+
1345
+ size_t consumed_size = read_length;
1346
+ size_t res = LZ4F_getFrameInfo(dctx, &frame_info, buf_in, &consumed_size);
1347
+ if (LZ4F_isError(res)) { *length = -1; return NULL; }
1348
+ size_t buf_out_length;
1349
+ switch(frame_info.blockSizeID) {
1350
+ case LZ4F_default:
1351
+ case LZ4F_max64KB:
1352
+ buf_out_length = 1 << 16;
1353
+ break;
1354
+ case LZ4F_max256KB:
1355
+ buf_out_length = 1 << 18;
1356
+ break;
1357
+ case LZ4F_max1MB:
1358
+ buf_out_length = 1 << 20;
1359
+ break;
1360
+ case LZ4F_max4MB:
1361
+ buf_out_length = 1 << 22;
1362
+ break;
1363
+ default:
1364
+ buf_out_length = 0;
1365
+ }
1366
+
1367
+ res = 1;
1368
+ int first_chunk = 1;
1369
+
1370
+ /* decompress data */
1371
+ while (res != 0) {
1372
+ if (!first_chunk) {
1373
+ read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1374
+ frt_is_read_bytes(is, buf_in, read_length);
1375
+ compressed_len -= read_length;
1376
+ consumed_size = 0;
1377
+ }
1378
+ first_chunk = 0;
1379
+
1380
+ char *src = (char *)(buf_in + consumed_size);
1381
+ char *src_end = (char *)buf_in + read_length;
1382
+
1383
+ while (src < src_end && res != 0){
1384
+ size_t dest_length = buf_out_length;
1385
+ size_t consumed_size = read_length;
1386
+ FRT_REALLOC_N(buf_out, char, dc_length + buf_out_length);
1387
+ res = LZ4F_decompress(dctx, buf_out + dc_length, &dest_length, src, &consumed_size, NULL);
1388
+ if (LZ4F_isError(res)) { *length = -1; return NULL; }
1389
+ dc_length += dest_length;
1390
+ src = src + consumed_size;
1391
+ }
1392
+ }
1393
+
1394
+ /* finish up */
1395
+ LZ4F_freeDecompressionContext(dctx);
1396
+
1397
+ FRT_REALLOC_N(buf_out, char, dc_length + 1);
1398
+ buf_out[dc_length] = '\0';
1399
+
1400
+ *length = dc_length;
1401
+ return buf_out;
1402
+ }
1403
+
1404
+ static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len, FrtCompressionType compression) {
1405
+ switch (compression) {
1406
+ case FRT_COMPRESSION_BROTLI:
1407
+ return is_read_brotli_compressed_bytes(is, compressed_len, len);
1408
+ case FRT_COMPRESSION_BZ2:
1409
+ return is_read_bz2_compressed_bytes(is, compressed_len, len);
1410
+ case FRT_COMPRESSION_LZ4:
1411
+ return is_read_lz4_compressed_bytes(is, compressed_len, len);
1412
+ default:
1413
+ return NULL;
1414
+ }
1415
+ }
1416
+
1417
+ char *frt_lazy_df_get_data(FrtLazyDocField *self, int i) {
1225
1418
  char *text = NULL;
1226
1419
  if (i < self->size && i >= 0) {
1227
1420
  text = self->data[i].text;
1228
1421
  if (NULL == text) {
1229
1422
  const int read_len = self->data[i].length + 1;
1230
1423
  frt_is_seek(self->doc->fields_in, self->data[i].start);
1231
- if (self->is_compressed) {
1232
- self->data[i].text = text = is_read_compressed_bytes(self->doc->fields_in, read_len, &(self->data[i].length));
1424
+ if (self->data[i].compression != FRT_COMPRESSION_NONE) {
1425
+ self->data[i].text = text = is_read_compressed_bytes(self->doc->fields_in, read_len, &(self->data[i].length), self->data[i].compression);
1233
1426
  } else {
1234
1427
  self->data[i].text = text = FRT_ALLOC_N(char, read_len);
1235
1428
  frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)text, read_len);
@@ -1241,9 +1434,8 @@ char *frt_lazy_df_get_data(FrtLazyDocField *self, int i)
1241
1434
  return text;
1242
1435
  }
1243
1436
 
1244
- void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
1245
- {
1246
- if (self->is_compressed == 1) {
1437
+ void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len) {
1438
+ if (self->compression != FRT_COMPRESSION_NONE && !self->decompressed) {
1247
1439
  int i;
1248
1440
  self->len = 0;
1249
1441
  for (i = self->size-1; i >= 0; i--) {
@@ -1251,7 +1443,7 @@ void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
1251
1443
  self->len += self->data[i].length + 1;
1252
1444
  }
1253
1445
  self->len--; /* each field separated by ' ' but no need to add to end */
1254
- self->is_compressed = 2;
1446
+ self->decompressed = true;
1255
1447
  }
1256
1448
  if (start < 0 || start >= self->len) {
1257
1449
  FRT_RAISE(FRT_IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
@@ -1264,7 +1456,7 @@ void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
1264
1456
  FRT_RAISE(FRT_IO_ERROR, "Tried to read past end of field. Field is only %d "
1265
1457
  "bytes long but tried to read to %d", self->len, start + len);
1266
1458
  }
1267
- if (self->is_compressed) {
1459
+ if (self->compression != FRT_COMPRESSION_NONE) {
1268
1460
  int cur_start = 0, buf_start = 0, cur_end, i, copy_start, copy_len;
1269
1461
  for (i = 0; i < self->size; i++) {
1270
1462
  cur_end = cur_start + self->data[i].length;
@@ -1328,21 +1520,17 @@ static void lazy_doc_add_field(FrtLazyDoc *self, FrtLazyDocField *lazy_df, int i
1328
1520
  lazy_df->doc = self;
1329
1521
  }
1330
1522
 
1331
- FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field)
1332
- {
1523
+ FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field) {
1333
1524
  return (FrtLazyDocField *)frt_h_get(self->field_dictionary, (void *)field);
1334
1525
  }
1335
1526
 
1336
1527
  /****************************************************************************
1337
- *
1338
1528
  * FrtFieldsReader
1339
- *
1340
1529
  ****************************************************************************/
1341
1530
 
1342
1531
  #define FIELDS_IDX_PTR_SIZE 12
1343
1532
 
1344
- FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis)
1345
- {
1533
+ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis) {
1346
1534
  FrtFieldsReader *fr = FRT_ALLOC(FrtFieldsReader);
1347
1535
  FrtInStream *fdx_in;
1348
1536
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
@@ -1362,8 +1550,7 @@ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos
1362
1550
  return fr;
1363
1551
  }
1364
1552
 
1365
- FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig)
1366
- {
1553
+ FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig) {
1367
1554
  FrtFieldsReader *fr = FRT_ALLOC(FrtFieldsReader);
1368
1555
 
1369
1556
  memcpy(fr, orig, sizeof(FrtFieldsReader));
@@ -1373,35 +1560,33 @@ FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig)
1373
1560
  return fr;
1374
1561
  }
1375
1562
 
1376
- void frt_fr_close(FrtFieldsReader *fr)
1377
- {
1563
+ void frt_fr_close(FrtFieldsReader *fr) {
1378
1564
  frt_is_close(fr->fdt_in);
1379
1565
  frt_is_close(fr->fdx_in);
1380
1566
  free(fr);
1381
1567
  }
1382
1568
 
1383
- static FrtDocField *frt_fr_df_new(FrtSymbol name, int size, bool is_compressed)
1384
- {
1569
+ static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression) {
1385
1570
  FrtDocField *df = FRT_ALLOC(FrtDocField);
1386
1571
  df->name = name;
1387
1572
  df->capa = df->size = size;
1388
1573
  df->data = FRT_ALLOC_N(char *, df->capa);
1389
1574
  df->lengths = FRT_ALLOC_N(int, df->capa);
1575
+ df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
1390
1576
  df->destroy_data = true;
1391
1577
  df->boost = 1.0f;
1392
- df->is_compressed = is_compressed;
1578
+ df->compression = compression;
1393
1579
  return df;
1394
1580
  }
1395
1581
 
1396
- static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df)
1397
- {
1582
+ static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression) {
1398
1583
  int i;
1399
1584
  const int df_size = df->size;
1400
1585
  FrtInStream *fdt_in = fr->fdt_in;
1401
1586
 
1402
1587
  for (i = 0; i < df_size; i++) {
1403
1588
  const int compressed_len = df->lengths[i] + 1;
1404
- df->data[i] = is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]));
1589
+ df->data[i] = is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression);
1405
1590
  }
1406
1591
  }
1407
1592
 
@@ -1423,18 +1608,20 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
1423
1608
  const int field_num = frt_is_read_vint(fdt_in);
1424
1609
  FrtFieldInfo *fi = fr->fis->fields[field_num];
1425
1610
  const int df_size = frt_is_read_vint(fdt_in);
1426
- FrtDocField *df = frt_fr_df_new(fi->name, df_size, fi_is_compressed(fi));
1611
+ FrtDocField *df = frt_fr_df_new(fi->name, df_size, frt_fi_get_compression(fi));
1427
1612
 
1428
1613
  for (j = 0; j < df_size; j++) {
1429
1614
  df->lengths[j] = frt_is_read_vint(fdt_in);
1615
+ df->encodings[j] = rb_enc_from_index(frt_is_read_vint(fdt_in));
1616
+ df->compression = frt_is_read_vint(fdt_in);
1430
1617
  }
1431
1618
 
1432
1619
  frt_doc_add_field(doc, df);
1433
1620
  }
1434
1621
  for (i = 0; i < stored_cnt; i++) {
1435
1622
  FrtDocField *df = doc->fields[i];
1436
- if (df->is_compressed) {
1437
- frt_fr_read_compressed_fields(fr, df);
1623
+ if (df->compression != FRT_COMPRESSION_NONE) {
1624
+ frt_fr_read_compressed_fields(fr, df, df->compression);
1438
1625
  } else {
1439
1626
  const int df_size = df->size;
1440
1627
  for (j = 0; j < df_size; j++) {
@@ -1458,31 +1645,37 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
1458
1645
  FrtLazyDoc *lazy_doc;
1459
1646
  FrtInStream *fdx_in = fr->fdx_in;
1460
1647
  FrtInStream *fdt_in = fr->fdt_in;
1648
+
1461
1649
  frt_is_seek(fdx_in, doc_num * FIELDS_IDX_PTR_SIZE);
1462
1650
  pos = (off_t)frt_is_read_u64(fdx_in);
1463
1651
  frt_is_seek(fdt_in, pos);
1464
1652
  stored_cnt = frt_is_read_vint(fdt_in);
1653
+
1465
1654
  lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
1466
1655
  for (i = 0; i < stored_cnt; i++) {
1467
1656
  FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
1468
- const int data_cnt = frt_is_read_vint(fdt_in);
1469
- FrtLazyDocField *lazy_df = lazy_df_new(fi->name, data_cnt, fi_is_compressed(fi));
1657
+ const int df_size = frt_is_read_vint(fdt_in);
1658
+ FrtLazyDocField *lazy_df = lazy_df_new(fi->name, df_size, frt_fi_get_compression(fi));
1470
1659
  const int field_start = start;
1471
1660
  /* get the starts relative positions this time around */
1472
- for (j = 0; j < data_cnt; j++) {
1661
+
1662
+ for (j = 0; j < df_size; j++) {
1473
1663
  lazy_df->data[j].start = start;
1474
1664
  start += 1 + (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
1665
+ lazy_df->data[j].encoding = rb_enc_from_index(frt_is_read_vint(fdt_in));
1666
+ lazy_df->data[j].compression = frt_is_read_vint(fdt_in);
1475
1667
  }
1668
+
1476
1669
  lazy_df->len = start - field_start - 1;
1477
1670
  lazy_doc_add_field(lazy_doc, lazy_df, i);
1478
1671
  }
1479
1672
  /* correct the starts to their correct absolute positions */
1673
+ const off_t abs_start = frt_is_pos(fdt_in);
1480
1674
  for (i = 0; i < stored_cnt; i++) {
1481
1675
  FrtLazyDocField *lazy_df = lazy_doc->fields[i];
1482
- const int data_cnt = lazy_df->size;
1483
- const off_t start = frt_is_pos(fdt_in);
1484
- for (j = 0; j < data_cnt; j++) {
1485
- lazy_df->data[j].start += start;
1676
+ const int df_size = lazy_df->size;
1677
+ for (j = 0; j < df_size; j++) {
1678
+ lazy_df->data[j].start += abs_start;
1486
1679
  }
1487
1680
  }
1488
1681
 
@@ -1660,42 +1853,150 @@ void frt_fw_close(FrtFieldsWriter *fw)
1660
1853
  free(fw);
1661
1854
  }
1662
1855
 
1663
- static int frt_os_write_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length)
1664
- {
1665
- size_t compressed_len = 0;
1856
+ static int frt_os_write_brotli_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
1857
+ size_t compressed_length = 0;
1666
1858
  const frt_uchar *next_in = data;
1667
1859
  size_t available_in = length;
1668
1860
  size_t available_out;
1669
- frt_uchar compression_buffer[COMPRESSION_BUFFER_SIZE];
1861
+ frt_uchar compression_buffer[FRT_COMPRESSION_BUFFER_SIZE];
1670
1862
  frt_uchar *next_out;
1671
1863
  BrotliEncoderState *b_state = BrotliEncoderCreateInstance(NULL, NULL, NULL);
1672
1864
  if (!b_state) { comp_raise(); return -1; }
1673
1865
 
1674
- BrotliEncoderSetParameter(b_state, BROTLI_PARAM_QUALITY, COMPRESSION_LEVEL);
1866
+ BrotliEncoderSetParameter(b_state, BROTLI_PARAM_QUALITY, FRT_BROTLI_COMPRESSION_LEVEL);
1675
1867
 
1676
1868
  do {
1677
- available_out = COMPRESSION_BUFFER_SIZE;
1869
+ available_out = FRT_COMPRESSION_BUFFER_SIZE;
1678
1870
  next_out = compression_buffer;
1679
1871
  if (!BrotliEncoderCompressStream(b_state, BROTLI_OPERATION_FINISH,
1680
1872
  &available_in, &next_in,
1681
- &available_out, &next_out, &compressed_len)) {
1873
+ &available_out, &next_out, &compressed_length)) {
1682
1874
  BrotliEncoderDestroyInstance(b_state);
1683
1875
  comp_raise();
1684
1876
  return -1;
1685
1877
  }
1686
- frt_os_write_bytes(out_stream, compression_buffer, COMPRESSION_BUFFER_SIZE - available_out);
1878
+ frt_os_write_bytes(out_stream, compression_buffer, FRT_COMPRESSION_BUFFER_SIZE - available_out);
1687
1879
  } while (!BrotliEncoderIsFinished(b_state));
1688
1880
 
1689
1881
  BrotliEncoderDestroyInstance(b_state);
1690
- // fprintf(stderr, "Compressed: %i -> %i\n", length, (int)compressed_len);
1691
- return (int)compressed_len;
1882
+
1883
+ return (int)compressed_length;
1692
1884
  }
1693
1885
 
1694
- void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc)
1695
- {
1886
+ static int frt_os_write_bz2_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
1887
+ int ret, buf_size, compressed_len = 0;
1888
+ char out_buffer[FRT_COMPRESSION_BUFFER_SIZE];
1889
+ bz_stream zstrm;
1890
+ zstrm.bzalloc = NULL;
1891
+ zstrm.bzfree = NULL;
1892
+ zstrm.opaque = NULL;
1893
+ if ((ret = BZ2_bzCompressInit(&zstrm, FRT_BZIP_COMPRESSION_LEVEL, 0, 0)) != BZ_OK) zraise(ret);
1894
+
1895
+ zstrm.avail_in = length;
1896
+ zstrm.next_in = (char *)data;
1897
+ zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
1898
+ zstrm.next_out = out_buffer;
1899
+
1900
+ do {
1901
+ ret = BZ2_bzCompress(&zstrm, BZ_FINISH); /* no bad return value */
1902
+ assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
1903
+ compressed_len += buf_size = FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
1904
+ frt_os_write_bytes(out_stream, (frt_uchar *)out_buffer, buf_size);
1905
+ } while (zstrm.avail_out == 0);
1906
+ assert(zstrm.avail_in == 0); /* all input will be used */
1907
+
1908
+ (void)BZ2_bzCompressEnd(&zstrm);
1909
+ return compressed_len;
1910
+ }
1911
+
1912
+ static const LZ4F_preferences_t lz4_prefs = {
1913
+ {
1914
+ LZ4F_default,
1915
+ LZ4F_blockLinked,
1916
+ LZ4F_noContentChecksum,
1917
+ LZ4F_frame,
1918
+ 0, /* unknown content size */
1919
+ 0, /* no dictID */
1920
+ LZ4F_noBlockChecksum
1921
+ },
1922
+ 0,
1923
+ 1,
1924
+ 1,
1925
+ {0,0,0}
1926
+ };
1927
+
1928
+ static int frt_os_write_lz4_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
1929
+ int compressed_length = 0;
1930
+ int remaining_length = length;
1931
+ size_t ccmp_length = 0;
1932
+ LZ4F_compressionContext_t ctx;
1933
+ size_t out_buf_length = LZ4F_compressBound(FRT_COMPRESSION_BUFFER_SIZE, &lz4_prefs);
1934
+ frt_uchar *out_buf = frt_ecalloc(out_buf_length);
1935
+
1936
+ size_t ctx_creation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
1937
+ if (LZ4F_isError(ctx_creation)) {
1938
+ compressed_length = -1;
1939
+ goto finish;
1940
+ }
1941
+
1942
+ /* create header */
1943
+ ccmp_length = LZ4F_compressBegin(ctx, out_buf, out_buf_length, &lz4_prefs);
1944
+ if (LZ4F_isError(ccmp_length)) {
1945
+ compressed_length = -1;
1946
+ goto finish;
1947
+ }
1948
+ compressed_length = ccmp_length;
1949
+ frt_os_write_bytes(out_stream, out_buf, ccmp_length);
1950
+
1951
+ /* compress data */
1952
+ do {
1953
+ int read_length = (FRT_COMPRESSION_BUFFER_SIZE > remaining_length) ? remaining_length : FRT_COMPRESSION_BUFFER_SIZE;
1954
+ ccmp_length = LZ4F_compressUpdate(ctx, out_buf, out_buf_length, data + (length - remaining_length), read_length, NULL);
1955
+ if (LZ4F_isError(ccmp_length)) {
1956
+ compressed_length = -1;
1957
+ goto finish;
1958
+ }
1959
+ frt_os_write_bytes(out_stream, out_buf, ccmp_length);
1960
+ compressed_length += ccmp_length;
1961
+ remaining_length -= read_length;
1962
+ } while (remaining_length > 0);
1963
+
1964
+ /* finish up */
1965
+ ccmp_length = LZ4F_compressEnd(ctx, out_buf, out_buf_length, NULL);
1966
+ if (LZ4F_isError(ccmp_length)) {
1967
+ compressed_length = -1;
1968
+ goto finish;
1969
+ }
1970
+
1971
+ frt_os_write_bytes(out_stream, out_buf, ccmp_length);
1972
+ compressed_length += ccmp_length;
1973
+
1974
+ finish:
1975
+ LZ4F_freeCompressionContext(ctx);
1976
+ free(out_buf);
1977
+
1978
+ return compressed_length;
1979
+ }
1980
+
1981
+ static int frt_os_write_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length, FrtCompressionType compression) {
1982
+ switch (compression) {
1983
+ case FRT_COMPRESSION_BROTLI:
1984
+ return frt_os_write_brotli_compressed_bytes(out_stream, data, length);
1985
+ case FRT_COMPRESSION_BZ2:
1986
+ return frt_os_write_bz2_compressed_bytes(out_stream, data, length);
1987
+ case FRT_COMPRESSION_LZ4:
1988
+ return frt_os_write_lz4_compressed_bytes(out_stream, data, length);
1989
+ default:
1990
+ return -1;
1991
+ }
1992
+
1993
+ }
1994
+
1995
+ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
1696
1996
  int i, j, stored_cnt = 0;
1697
1997
  FrtDocField *df;
1698
1998
  FrtFieldInfo *fi;
1999
+ FrtCompressionType compression;
1699
2000
  FrtOutStream *fdt_out = fw->fdt_out, *fdx_out = fw->fdx_out;
1700
2001
  const int doc_size = doc->size;
1701
2002
 
@@ -1719,16 +2020,22 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc)
1719
2020
  const int df_size = df->size;
1720
2021
  frt_os_write_vint(fdt_out, fi->number);
1721
2022
  frt_os_write_vint(fdt_out, df_size);
2023
+
1722
2024
  if (fi_is_compressed(fi)) {
2025
+ compression = frt_fi_get_compression(fi);
1723
2026
  for (j = 0; j < df_size; j++) {
1724
2027
  const int length = df->lengths[j];
1725
- int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
2028
+ int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], length, compression);
1726
2029
  frt_os_write_vint(fdt_out, compressed_len - 1);
2030
+ frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
2031
+ frt_os_write_vint(fdt_out, compression);
1727
2032
  }
1728
2033
  } else {
1729
2034
  for (j = 0; j < df_size; j++) {
1730
2035
  const int length = df->lengths[j];
1731
2036
  frt_os_write_vint(fdt_out, length);
2037
+ frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
2038
+ frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
1732
2039
  frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
1733
2040
  /* leave a space between fields as that is how they are analyzed */
1734
2041
  frt_os_write_byte(fw->buffer, ' ');
@@ -2087,8 +2394,7 @@ static char *ste_scan_to(FrtTermEnum *te, const char *term)
2087
2394
  }
2088
2395
  }
2089
2396
 
2090
- static FrtSegmentTermEnum *ste_allocate()
2091
- {
2397
+ static FrtSegmentTermEnum *ste_allocate(void) {
2092
2398
  FrtSegmentTermEnum *ste = FRT_ALLOC_AND_ZERO(FrtSegmentTermEnum);
2093
2399
 
2094
2400
  TE(ste)->next = &ste_next;
@@ -2113,7 +2419,6 @@ void frt_ste_close(FrtTermEnum *te)
2113
2419
  free(te);
2114
2420
  }
2115
2421
 
2116
-
2117
2422
  static char *frt_ste_get_term(FrtTermEnum *te, int pos)
2118
2423
  {
2119
2424
  FrtSegmentTermEnum *ste = STE(te);
@@ -2228,9 +2533,7 @@ static void tew_destroy(TermEnumWrapper *tew)
2228
2533
  tew->te->close(tew->te);
2229
2534
  }
2230
2535
 
2231
- static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, FrtTermEnum *te,
2232
- FrtIndexReader *ir)
2233
- {
2536
+ static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, FrtTermEnum *te, FrtIndexReader *ir) {
2234
2537
  tew->index = index;
2235
2538
  tew->ir = ir;
2236
2539
  tew->te = te;
@@ -2239,9 +2542,7 @@ static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, FrtTermEnum *
2239
2542
  return tew;
2240
2543
  }
2241
2544
 
2242
-
2243
- static char *mte_next(FrtTermEnum *te)
2244
- {
2545
+ static char *mte_next(FrtTermEnum *te) {
2245
2546
  TermEnumWrapper *top =
2246
2547
  (TermEnumWrapper *)frt_pq_top(MTE(te)->tew_queue);
2247
2548
 
@@ -2271,8 +2572,7 @@ static char *mte_next(FrtTermEnum *te)
2271
2572
  return te->curr_term;
2272
2573
  }
2273
2574
 
2274
- static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num)
2275
- {
2575
+ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num) {
2276
2576
  MultiTermEnum *mte = MTE(te);
2277
2577
  int i;
2278
2578
  const int size = mte->size;
@@ -2300,8 +2600,7 @@ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num)
2300
2600
  return te;
2301
2601
  }
2302
2602
 
2303
- static char *mte_skip_to(FrtTermEnum *te, const char *term)
2304
- {
2603
+ static char *mte_skip_to(FrtTermEnum *te, const char *term) {
2305
2604
  MultiTermEnum *mte = MTE(te);
2306
2605
  int i;
2307
2606
  const int size = mte->size;
@@ -2317,8 +2616,7 @@ static char *mte_skip_to(FrtTermEnum *te, const char *term)
2317
2616
  return mte_next(te);
2318
2617
  }
2319
2618
 
2320
- static void mte_close(FrtTermEnum *te)
2321
- {
2619
+ static void mte_close(FrtTermEnum *te) {
2322
2620
  int i;
2323
2621
  const int size = MTE(te)->size;
2324
2622
  for (i = 0; i < size; i++) {
@@ -2331,10 +2629,9 @@ static void mte_close(FrtTermEnum *te)
2331
2629
  free(te);
2332
2630
  }
2333
2631
 
2334
- FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
2335
- {
2336
- FrtIndexReader **readers = mr->sub_readers;
2337
- int r_cnt = mr->r_cnt;
2632
+ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term) {
2633
+ FrtIndexReader **readers = mr->sub_readers;
2634
+ int r_cnt = mr->r_cnt;
2338
2635
  int i;
2339
2636
  FrtIndexReader *reader;
2340
2637
  MultiTermEnum *mte = FRT_ALLOC_AND_ZERO(MultiTermEnum);
@@ -2362,8 +2659,7 @@ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
2362
2659
 
2363
2660
  if (NULL != term) {
2364
2661
  sub_te = reader->terms_from(reader, fnum, term);
2365
- }
2366
- else {
2662
+ } else {
2367
2663
  sub_te = reader->terms(reader, fnum);
2368
2664
  }
2369
2665
 
@@ -2372,8 +2668,7 @@ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
2372
2668
  || (tew->term && (tew->term[0] != '\0'))) {
2373
2669
  frt_pq_push(mte->tew_queue, tew); /* initialize queue */
2374
2670
  }
2375
- }
2376
- else {
2671
+ } else {
2377
2672
  /* add the term_enum_wrapper just in case */
2378
2673
  sub_te = reader->terms(reader, 0);
2379
2674
  sub_te->field_num = -1;
@@ -2395,9 +2690,7 @@ FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term)
2395
2690
  *
2396
2691
  ****************************************************************************/
2397
2692
 
2398
- FrtTermInfosReader *frt_tir_open(FrtStore *store,
2399
- FrtSegmentFieldIndex *sfi, const char *segment)
2400
- {
2693
+ FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, const char *segment) {
2401
2694
  FrtTermInfosReader *tir = FRT_ALLOC(FrtTermInfosReader);
2402
2695
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
2403
2696
 
@@ -2410,8 +2703,7 @@ FrtTermInfosReader *frt_tir_open(FrtStore *store,
2410
2703
  return tir;
2411
2704
  }
2412
2705
 
2413
- static FrtTermEnum *tir_enum(FrtTermInfosReader *tir)
2414
- {
2706
+ static FrtTermEnum *tir_enum(FrtTermInfosReader *tir) {
2415
2707
  FrtTermEnum *te;
2416
2708
  if (NULL == (te = (FrtTermEnum *)frt_thread_getspecific(tir->thread_te))) {
2417
2709
  te = frt_ste_clone(tir->orig_te);
@@ -2422,8 +2714,7 @@ static FrtTermEnum *tir_enum(FrtTermInfosReader *tir)
2422
2714
  return te;
2423
2715
  }
2424
2716
 
2425
- FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num)
2426
- {
2717
+ FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num) {
2427
2718
  if (field_num != tir->field_num) {
2428
2719
  ste_set_field(tir_enum(tir), field_num);
2429
2720
  tir->field_num = field_num;
@@ -2431,8 +2722,7 @@ FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num)
2431
2722
  return tir;
2432
2723
  }
2433
2724
 
2434
- FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term)
2435
- {
2725
+ FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term) {
2436
2726
  FrtTermEnum *te = tir_enum(tir);
2437
2727
  char *match;
2438
2728
 
@@ -2443,9 +2733,7 @@ FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term)
2443
2733
  return NULL;
2444
2734
  }
2445
2735
 
2446
- static FrtTermInfo *tir_get_ti_field(FrtTermInfosReader *tir, int field_num,
2447
- const char *term)
2448
- {
2736
+ static FrtTermInfo *tir_get_ti_field(FrtTermInfosReader *tir, int field_num, const char *term) {
2449
2737
  FrtTermEnum *te = tir_enum(tir);
2450
2738
  char *match;
2451
2739
 
@@ -2461,19 +2749,16 @@ static FrtTermInfo *tir_get_ti_field(FrtTermInfosReader *tir, int field_num,
2461
2749
  return NULL;
2462
2750
  }
2463
2751
 
2464
- char *frt_tir_get_term(FrtTermInfosReader *tir, int pos)
2465
- {
2752
+ char *frt_tir_get_term(FrtTermInfosReader *tir, int pos) {
2466
2753
  if (pos < 0) {
2467
2754
  return NULL;
2468
- }
2469
- else {
2755
+ } else {
2470
2756
  return frt_ste_get_term(tir_enum(tir), pos);
2471
2757
  }
2472
2758
  }
2473
2759
 
2474
2760
 
2475
- void frt_tir_close(FrtTermInfosReader *tir)
2476
- {
2761
+ void frt_tir_close(FrtTermInfosReader *tir) {
2477
2762
  frt_ary_destroy(tir->te_bucket, (frt_free_ft)&frt_ste_close);
2478
2763
  frt_ste_close(tir->orig_te);
2479
2764
 
@@ -2490,25 +2775,19 @@ void frt_tir_close(FrtTermInfosReader *tir)
2490
2775
  *
2491
2776
  ****************************************************************************/
2492
2777
 
2493
- static FrtTermWriter *tw_new(FrtStore *store, char *file_name)
2494
- {
2778
+ static FrtTermWriter *tw_new(FrtStore *store, char *file_name) {
2495
2779
  FrtTermWriter *tw = FRT_ALLOC_AND_ZERO(FrtTermWriter);
2496
2780
  tw->os = store->new_output(store, file_name);
2497
2781
  tw->last_term = FRT_EMPTY_STRING;
2498
2782
  return tw;
2499
2783
  }
2500
2784
 
2501
- static void tw_close(FrtTermWriter *tw)
2502
- {
2785
+ static void tw_close(FrtTermWriter *tw) {
2503
2786
  frt_os_close(tw->os);
2504
2787
  free(tw);
2505
2788
  }
2506
2789
 
2507
- FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
2508
- const char *segment,
2509
- int index_interval,
2510
- int skip_interval)
2511
- {
2790
+ FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index_interval, int skip_interval) {
2512
2791
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
2513
2792
  FrtTermInfosWriter *tiw = FRT_ALLOC(FrtTermInfosWriter);
2514
2793
  size_t segment_len = strlen(segment);
@@ -2537,11 +2816,7 @@ FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
2537
2816
  return tiw;
2538
2817
  }
2539
2818
 
2540
- static void tw_write_term(FrtTermWriter *tw,
2541
- FrtOutStream *os,
2542
- const char *term,
2543
- int term_len)
2544
- {
2819
+ static void tw_write_term(FrtTermWriter *tw, FrtOutStream *os, const char *term, int term_len) {
2545
2820
  int start = frt_hlp_string_diff(tw->last_term, term);
2546
2821
  int length = term_len - start;
2547
2822
 
@@ -2552,12 +2827,7 @@ static void tw_write_term(FrtTermWriter *tw,
2552
2827
  tw->last_term = term;
2553
2828
  }
2554
2829
 
2555
- static void tw_add(FrtTermWriter *tw,
2556
- const char *term,
2557
- int term_len,
2558
- FrtTermInfo *ti,
2559
- int skip_interval)
2560
- {
2830
+ static void tw_add(FrtTermWriter *tw, const char *term, int term_len, FrtTermInfo *ti, int skip_interval) {
2561
2831
  FrtOutStream *os = tw->os;
2562
2832
 
2563
2833
  #ifdef DEBUG
@@ -2587,11 +2857,7 @@ static void tw_add(FrtTermWriter *tw,
2587
2857
  tw->counter++;
2588
2858
  }
2589
2859
 
2590
- void frt_tiw_add(FrtTermInfosWriter *tiw,
2591
- const char *term,
2592
- int term_len,
2593
- FrtTermInfo *ti)
2594
- {
2860
+ void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int term_len, FrtTermInfo *ti) {
2595
2861
  off_t tis_pos;
2596
2862
 
2597
2863
  if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
@@ -2609,15 +2875,13 @@ void frt_tiw_add(FrtTermInfosWriter *tiw,
2609
2875
  tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
2610
2876
  }
2611
2877
 
2612
- static void tw_reset(FrtTermWriter *tw)
2613
- {
2878
+ static void tw_reset(FrtTermWriter *tw) {
2614
2879
  tw->counter = 0;
2615
2880
  tw->last_term = FRT_EMPTY_STRING;
2616
2881
  FRT_ZEROSET(&(tw->last_term_info), FrtTermInfo);
2617
2882
  }
2618
2883
 
2619
- void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num)
2620
- {
2884
+ void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num) {
2621
2885
  FrtOutStream *tfx_out = tiw->tfx_out;
2622
2886
  frt_os_write_vint(tfx_out, tiw->tix_writer->counter); /* write tix size */
2623
2887
  frt_os_write_vint(tfx_out, tiw->tis_writer->counter); /* write tis size */
@@ -2630,8 +2894,7 @@ void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num)
2630
2894
  tiw->field_count++;
2631
2895
  }
2632
2896
 
2633
- void frt_tiw_close(FrtTermInfosWriter *tiw)
2634
- {
2897
+ void frt_tiw_close(FrtTermInfosWriter *tiw) {
2635
2898
  FrtOutStream *tfx_out = tiw->tfx_out;
2636
2899
  frt_os_write_vint(tfx_out, tiw->tix_writer->counter);
2637
2900
  frt_os_write_vint(tfx_out, tiw->tis_writer->counter);
@@ -2665,8 +2928,7 @@ void frt_tiw_close(FrtTermInfosWriter *tiw)
2665
2928
  }\
2666
2929
  } while (0)
2667
2930
 
2668
- static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti)
2669
- {
2931
+ static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti) {
2670
2932
  if (NULL == ti) {
2671
2933
  stde->doc_freq = 0;
2672
2934
  } else {
@@ -2684,14 +2946,12 @@ static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti)
2684
2946
  }
2685
2947
  }
2686
2948
 
2687
- static void stde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
2688
- {
2949
+ static void stde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
2689
2950
  FrtTermInfo *ti = tir_get_ti_field(STDE(tde)->tir, field_num, term);
2690
2951
  stde_seek_ti(STDE(tde), ti);
2691
2952
  }
2692
2953
 
2693
- static void stde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
2694
- {
2954
+ static void stde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te) {
2695
2955
  #ifdef DEBUG
2696
2956
  if (te->set_field != &ste_set_field) {
2697
2957
  FRT_RAISE(FRT_ARG_ERROR, "Passed an incorrect TermEnum type");
@@ -2700,20 +2960,17 @@ static void stde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
2700
2960
  stde_seek_ti(STDE(tde), &(te->curr_ti));
2701
2961
  }
2702
2962
 
2703
- static int stde_doc_num(FrtTermDocEnum *tde)
2704
- {
2963
+ static int stde_doc_num(FrtTermDocEnum *tde) {
2705
2964
  CHECK_STATE("doc_num");
2706
2965
  return STDE(tde)->doc_num;
2707
2966
  }
2708
2967
 
2709
- static int stde_freq(FrtTermDocEnum *tde)
2710
- {
2968
+ static int stde_freq(FrtTermDocEnum *tde) {
2711
2969
  CHECK_STATE("freq");
2712
2970
  return STDE(tde)->freq;
2713
2971
  }
2714
2972
 
2715
- static bool stde_next(FrtTermDocEnum *tde)
2716
- {
2973
+ static bool stde_next(FrtTermDocEnum *tde) {
2717
2974
  int doc_code;
2718
2975
  FrtSegmentTermDocEnum *stde = STDE(tde);
2719
2976
 
@@ -2741,8 +2998,7 @@ static bool stde_next(FrtTermDocEnum *tde)
2741
2998
  return true;
2742
2999
  }
2743
3000
 
2744
- static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
2745
- {
3001
+ static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
2746
3002
  FrtSegmentTermDocEnum *stde = STDE(tde);
2747
3003
  int i = 0;
2748
3004
  int doc_code;
@@ -2769,8 +3025,7 @@ static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
2769
3025
  return i;
2770
3026
  }
2771
3027
 
2772
- static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
2773
- {
3028
+ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
2774
3029
  FrtSegmentTermDocEnum *stde = STDE(tde);
2775
3030
 
2776
3031
  if (stde->doc_freq >= stde->skip_interval
@@ -2834,8 +3089,7 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
2834
3089
  return true;
2835
3090
  }
2836
3091
 
2837
- static void stde_close(FrtTermDocEnum *tde)
2838
- {
3092
+ static void stde_close(FrtTermDocEnum *tde) {
2839
3093
  frt_is_close(STDE(tde)->frq_in);
2840
3094
 
2841
3095
  if (NULL != STDE(tde)->skip_in) {
@@ -2845,23 +3099,17 @@ static void stde_close(FrtTermDocEnum *tde)
2845
3099
  free(tde);
2846
3100
  }
2847
3101
 
2848
- static void stde_skip_prox(FrtSegmentTermDocEnum *stde)
2849
- {
3102
+ static void stde_skip_prox(FrtSegmentTermDocEnum *stde) {
2850
3103
  (void)stde;
2851
3104
  }
2852
3105
 
2853
- static void stde_seek_prox(FrtSegmentTermDocEnum *stde, off_t prx_ptr)
2854
- {
3106
+ static void stde_seek_prox(FrtSegmentTermDocEnum *stde, off_t prx_ptr) {
2855
3107
  (void)stde;
2856
3108
  (void)prx_ptr;
2857
3109
  }
2858
3110
 
2859
3111
 
2860
- FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir,
2861
- FrtInStream *frq_in,
2862
- FrtBitVector *deleted_docs,
2863
- int skip_interval)
2864
- {
3112
+ FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir, FrtInStream *frq_in, FrtBitVector *deleted_docs, int skip_interval) {
2865
3113
  FrtSegmentTermDocEnum *stde = FRT_ALLOC_AND_ZERO(FrtSegmentTermDocEnum);
2866
3114
  FrtTermDocEnum *tde = (FrtTermDocEnum *)stde;
2867
3115
 
@@ -2893,27 +3141,23 @@ FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir,
2893
3141
  * SegmentTermPosEnum
2894
3142
  ****************************************************************************/
2895
3143
 
2896
- static void stpe_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti)
2897
- {
3144
+ static void stpe_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti) {
2898
3145
  if (NULL == ti) {
2899
3146
  stde->doc_freq = 0;
2900
- }
2901
- else {
3147
+ } else {
2902
3148
  stde_seek_ti(stde, ti);
2903
3149
  frt_is_seek(stde->prx_in, ti->prx_ptr);
2904
3150
  }
2905
3151
  }
2906
3152
 
2907
- static void stpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
2908
- {
3153
+ static void stpe_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
2909
3154
  FrtSegmentTermDocEnum *stde = STDE(tde);
2910
3155
  FrtTermInfo *ti = tir_get_ti_field(stde->tir, field_num, term);
2911
3156
  stpe_seek_ti(stde, ti);
2912
3157
  stde->prx_cnt = 0;
2913
3158
  }
2914
3159
 
2915
- static bool stpe_next(FrtTermDocEnum *tde)
2916
- {
3160
+ static bool stpe_next(FrtTermDocEnum *tde) {
2917
3161
  FrtSegmentTermDocEnum *stde = STDE(tde);
2918
3162
  frt_is_skip_vints(stde->prx_in, stde->prx_cnt);
2919
3163
 
@@ -3387,8 +3631,8 @@ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, i
3387
3631
  ****************************************************************************/
3388
3632
 
3389
3633
  static FrtHash *fn_extensions = NULL;
3390
- static void file_name_filter_init()
3391
- {
3634
+
3635
+ static void file_name_filter_init(void) {
3392
3636
  int i;
3393
3637
  fn_extensions = frt_h_new_str((frt_free_ft)NULL, (frt_free_ft)NULL);
3394
3638
  for (i = 0; i < FRT_NELEMS(INDEX_EXTENSIONS); i++) {
@@ -3687,9 +3931,8 @@ static void ir_acquire_write_lock(FrtIndexReader *ir)
3687
3931
  }
3688
3932
  }
3689
3933
 
3690
- static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentInfos *sis,
3691
- FrtFieldInfos *fis, int is_owner)
3692
- {
3934
+ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentInfos *sis, FrtFieldInfos *fis, int is_owner) {
3935
+ ir->type = FRT_INDEX_READER;
3693
3936
  frt_mutex_init(&ir->mutex, NULL);
3694
3937
  frt_mutex_init(&ir->field_index_mutex, NULL);
3695
3938
 
@@ -3712,8 +3955,7 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
3712
3955
  return ir;
3713
3956
  }
3714
3957
 
3715
- int frt_ir_doc_freq(FrtIndexReader *ir, FrtSymbol field, const char *term)
3716
- {
3958
+ int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term) {
3717
3959
  int field_num = frt_fis_get_field_num(ir->fis, field);
3718
3960
  if (field_num >= 0) {
3719
3961
  return ir->doc_freq(ir, field_num, term);
@@ -3723,8 +3965,7 @@ int frt_ir_doc_freq(FrtIndexReader *ir, FrtSymbol field, const char *term)
3723
3965
  }
3724
3966
  }
3725
3967
 
3726
- static void ir_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val)
3727
- {
3968
+ static void ir_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val) {
3728
3969
  frt_mutex_lock(&ir->mutex);
3729
3970
  ir->acquire_write_lock(ir);
3730
3971
  ir->set_norm_i(ir, doc_num, field_num, val);
@@ -3732,8 +3973,7 @@ static void ir_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uc
3732
3973
  frt_mutex_unlock(&ir->mutex);
3733
3974
  }
3734
3975
 
3735
- void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, FrtSymbol field, frt_uchar val)
3736
- {
3976
+ void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val) {
3737
3977
  int field_num = frt_fis_get_field_num(ir->fis, field);
3738
3978
  if (field_num >= 0) {
3739
3979
  ir_set_norm_i(ir, doc_num, field_num, val);
@@ -3755,14 +3995,12 @@ frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num)
3755
3995
  return norms;
3756
3996
  }
3757
3997
 
3758
- frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, FrtSymbol field)
3759
- {
3998
+ frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, ID field) {
3760
3999
  int field_num = frt_fis_get_field_num(ir->fis, field);
3761
4000
  return frt_ir_get_norms_i(ir, field_num);
3762
4001
  }
3763
4002
 
3764
- frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, FrtSymbol field, frt_uchar *buf)
3765
- {
4003
+ frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
3766
4004
  int field_num = frt_fis_get_field_num(ir->fis, field);
3767
4005
  if (field_num >= 0) {
3768
4006
  ir->get_norms_into(ir, field_num, buf);
@@ -3793,7 +4031,7 @@ void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num)
3793
4031
  }
3794
4032
  }
3795
4033
 
3796
- FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, FrtSymbol field, const char *term) {
4034
+ FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, ID field, const char *term) {
3797
4035
  FrtTermDocEnum *tde = ir_term_docs_for(ir, field, term);
3798
4036
  FrtDocument *doc = NULL;
3799
4037
 
@@ -3806,8 +4044,7 @@ FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, FrtSymbol field, const
3806
4044
  return doc;
3807
4045
  }
3808
4046
 
3809
- FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, FrtSymbol field)
3810
- {
4047
+ FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, ID field) {
3811
4048
  FrtTermEnum *te = NULL;
3812
4049
  int field_num = frt_fis_get_field_num(ir->fis, field);
3813
4050
  if (field_num >= 0) {
@@ -3816,9 +4053,7 @@ FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, FrtSymbol field)
3816
4053
  return te;
3817
4054
  }
3818
4055
 
3819
- FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, FrtSymbol field,
3820
- const char *term)
3821
- {
4056
+ FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, ID field, const char *term) {
3822
4057
  FrtTermEnum *te = NULL;
3823
4058
  int field_num = frt_fis_get_field_num(ir->fis, field);
3824
4059
  if (field_num >= 0) {
@@ -3827,9 +4062,7 @@ FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, FrtSymbol field,
3827
4062
  return te;
3828
4063
  }
3829
4064
 
3830
- FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, FrtSymbol field,
3831
- const char *term)
3832
- {
4065
+ FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, ID field, const char *term) {
3833
4066
  int field_num = frt_fis_get_field_num(ir->fis, field);
3834
4067
  FrtTermDocEnum *tde = ir->term_docs(ir);
3835
4068
  if (field_num >= 0) {
@@ -3838,9 +4071,7 @@ FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, FrtSymbol field,
3838
4071
  return tde;
3839
4072
  }
3840
4073
 
3841
- FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, FrtSymbol field,
3842
- const char *term)
3843
- {
4074
+ FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const char *term) {
3844
4075
  int field_num = frt_fis_get_field_num(ir->fis, field);
3845
4076
  FrtTermDocEnum *tde = ir->term_positions(ir);
3846
4077
  if (field_num >= 0) {
@@ -3854,7 +4085,7 @@ static void ir_commit_i(FrtIndexReader *ir)
3854
4085
  if (ir->has_changes) {
3855
4086
  if (NULL == ir->deleter && NULL != ir->store) {
3856
4087
  /* In the MultiReader case, we share this deleter across all
3857
- * SegmentReaders: */
4088
+ * FrtSegmentReaders: */
3858
4089
  ir->set_deleter_i(ir, frt_deleter_new(ir->sis, ir->store));
3859
4090
  }
3860
4091
  if (ir->is_owner) {
@@ -3990,34 +4221,14 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
3990
4221
  }
3991
4222
 
3992
4223
  /****************************************************************************
3993
- * SegmentReader
4224
+ * FrtSegmentReader
3994
4225
  ****************************************************************************/
3995
4226
 
3996
- typedef struct SegmentReader {
3997
- FrtIndexReader ir;
3998
- FrtSegmentInfo *si;
3999
- char *segment;
4000
- FrtFieldsReader *fr;
4001
- FrtBitVector *deleted_docs;
4002
- FrtInStream *frq_in;
4003
- FrtInStream *prx_in;
4004
- FrtSegmentFieldIndex *sfi;
4005
- FrtTermInfosReader *tir;
4006
- frt_thread_key_t thread_fr;
4007
- void **fr_bucket;
4008
- FrtHash *norms;
4009
- FrtStore *cfs_store;
4010
- bool deleted_docs_dirty : 1;
4011
- bool undelete_all : 1;
4012
- bool norms_dirty : 1;
4013
- } SegmentReader;
4014
-
4015
4227
  #define IR(ir) ((FrtIndexReader *)(ir))
4016
-
4017
- #define SR(ir) ((SegmentReader *)(ir))
4228
+ #define SR(ir) ((FrtSegmentReader *)(ir))
4018
4229
  #define SR_SIZE(ir) (SR(ir)->fr->size)
4019
4230
 
4020
- static FrtFieldsReader *sr_fr(SegmentReader *sr)
4231
+ static FrtFieldsReader *sr_fr(FrtSegmentReader *sr)
4021
4232
  {
4022
4233
  FrtFieldsReader *fr;
4023
4234
 
@@ -4029,12 +4240,12 @@ static FrtFieldsReader *sr_fr(SegmentReader *sr)
4029
4240
  return fr;
4030
4241
  }
4031
4242
 
4032
- static bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
4243
+ static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num)
4033
4244
  {
4034
4245
  return (NULL != sr->deleted_docs && frt_bv_get(sr->deleted_docs, doc_num));
4035
4246
  }
4036
4247
 
4037
- static void sr_get_norms_into_i(SegmentReader *sr, int field_num,
4248
+ static void sr_get_norms_into_i(FrtSegmentReader *sr, int field_num,
4038
4249
  frt_uchar *buf)
4039
4250
  {
4040
4251
  Norm *norm = (Norm *)frt_h_get_int(sr->norms, field_num);
@@ -4053,7 +4264,7 @@ static void sr_get_norms_into_i(SegmentReader *sr, int field_num,
4053
4264
  }
4054
4265
  }
4055
4266
 
4056
- static frt_uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
4267
+ static frt_uchar *sr_get_norms_i(FrtSegmentReader *sr, int field_num)
4057
4268
  {
4058
4269
  Norm *norm = (Norm *)frt_h_get_int(sr->norms, field_num);
4059
4270
  if (NULL == norm) { /* not an indexed field */
@@ -4189,7 +4400,7 @@ static void sr_commit_i(FrtIndexReader *ir)
4189
4400
 
4190
4401
  static void sr_close_i(FrtIndexReader *ir)
4191
4402
  {
4192
- SegmentReader *sr = SR(ir);
4403
+ FrtSegmentReader *sr = SR(ir);
4193
4404
 
4194
4405
  if (sr->fr) frt_fr_close(sr->fr);
4195
4406
  if (sr->tir) frt_tir_close(sr->tir);
@@ -4298,14 +4509,12 @@ static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir)
4298
4509
 
4299
4510
  static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir)
4300
4511
  {
4301
- SegmentReader *sr = SR(ir);
4512
+ FrtSegmentReader *sr = SR(ir);
4302
4513
  return frt_stpe_new(sr->tir, sr->frq_in, sr->prx_in, sr->deleted_docs,
4303
4514
  STE(sr->tir->orig_te)->skip_interval);
4304
4515
  }
4305
4516
 
4306
- static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num,
4307
- FrtSymbol field)
4308
- {
4517
+ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field) {
4309
4518
  FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(ir->fis->field_dict, (void *)field);
4310
4519
  FrtFieldsReader *fr;
4311
4520
 
@@ -4360,7 +4569,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
4360
4569
  SR(ir)->norms_dirty = false;
4361
4570
  }
4362
4571
 
4363
- static FrtIndexReader *sr_setup_i(SegmentReader *sr)
4572
+ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
4364
4573
  {
4365
4574
  FrtStore *volatile store = sr->si->store;
4366
4575
  FrtIndexReader *ir = IR(sr);
@@ -4391,6 +4600,8 @@ static FrtIndexReader *sr_setup_i(SegmentReader *sr)
4391
4600
  ir->commit_i = &sr_commit_i;
4392
4601
  ir->close_i = &sr_close_i;
4393
4602
 
4603
+ ir->type = FRT_SEGMENT_READER;
4604
+
4394
4605
  sr->cfs_store = NULL;
4395
4606
 
4396
4607
  FRT_TRY
@@ -4430,10 +4641,13 @@ static FrtIndexReader *sr_setup_i(SegmentReader *sr)
4430
4641
  return ir;
4431
4642
  }
4432
4643
 
4433
- static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_num,
4434
- bool is_owner)
4435
- {
4436
- SegmentReader *sr = FRT_ALLOC_AND_ZERO(SegmentReader);
4644
+ FrtSegmentReader *frt_sr_alloc(void) {
4645
+ return FRT_ALLOC_AND_ZERO(FrtSegmentReader);
4646
+ }
4647
+
4648
+ static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_num, bool is_owner, FrtSegmentReader *sr) {
4649
+ if (sr == NULL)
4650
+ sr = frt_sr_alloc();
4437
4651
  sr->si = sis->segs[si_num];
4438
4652
  ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4439
4653
  return sr_setup_i(sr);
@@ -4604,9 +4818,7 @@ static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir)
4604
4818
  return mtpe_new(MR(ir));
4605
4819
  }
4606
4820
 
4607
- static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num,
4608
- FrtSymbol field)
4609
- {
4821
+ static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num, ID field) {
4610
4822
  GET_READER();
4611
4823
  return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
4612
4824
  }
@@ -4710,10 +4922,12 @@ static void mr_close_i(FrtIndexReader *ir)
4710
4922
  free(MR(ir)->starts);
4711
4923
  }
4712
4924
 
4713
- static FrtIndexReader *mr_new(FrtIndexReader **sub_readers, const int r_cnt)
4714
- {
4925
+ FrtMultiReader *frt_mr_alloc(void) {
4926
+ return FRT_ALLOC_AND_ZERO(FrtMultiReader);
4927
+ }
4928
+
4929
+ FrtMultiReader *frt_mr_init(FrtMultiReader *mr, FrtIndexReader **sub_readers, const int r_cnt) {
4715
4930
  int i;
4716
- FrtMultiReader *mr = FRT_ALLOC_AND_ZERO(FrtMultiReader);
4717
4931
  FrtIndexReader *ir = IR(mr);
4718
4932
 
4719
4933
  mr->sub_readers = sub_readers;
@@ -4760,21 +4974,19 @@ static FrtIndexReader *mr_new(FrtIndexReader **sub_readers, const int r_cnt)
4760
4974
  ir->commit_i = &mr_commit_i;
4761
4975
  ir->close_i = &mr_close_i;
4762
4976
 
4763
- return ir;
4977
+ ir->type = FRT_MULTI_READER;
4978
+
4979
+ return mr;
4764
4980
  }
4765
4981
 
4766
- static FrtIndexReader *frt_mr_open_i(FrtStore *store,
4767
- FrtSegmentInfos *sis,
4768
- FrtFieldInfos *fis,
4769
- FrtIndexReader **sub_readers,
4770
- const int r_cnt)
4771
- {
4772
- FrtIndexReader *ir = mr_new(sub_readers, r_cnt);
4982
+ static FrtIndexReader *frt_mr_open_i(FrtStore *store, FrtSegmentInfos *sis, FrtFieldInfos *fis, FrtIndexReader **sub_readers, const int r_cnt, FrtIndexReader *ir) {
4983
+ if (ir == NULL)
4984
+ ir = (FrtIndexReader *)frt_mr_alloc();
4985
+ ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
4773
4986
  return ir_setup(ir, store, sis, fis, true);
4774
4987
  }
4775
4988
 
4776
- static void mr_close_ext_i(FrtIndexReader *ir)
4777
- {
4989
+ static void mr_close_ext_i(FrtIndexReader *ir) {
4778
4990
  int **field_num_map = MR(ir)->field_num_map;
4779
4991
  if (field_num_map) {
4780
4992
  int i;
@@ -4787,12 +4999,13 @@ static void mr_close_ext_i(FrtIndexReader *ir)
4787
4999
  mr_close_i(ir);
4788
5000
  }
4789
5001
 
4790
- FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt)
4791
- {
4792
- FrtIndexReader *ir = mr_new(sub_readers, r_cnt);
5002
+ FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, const int r_cnt) {
5003
+ if (ir == NULL)
5004
+ ir = (FrtIndexReader *)frt_mr_alloc();
5005
+ ir = (FrtIndexReader *)frt_mr_init((FrtMultiReader *)ir, sub_readers, r_cnt);
4793
5006
  FrtMultiReader *mr = MR(ir);
4794
5007
  /* defaults don't matter, this is just for reading fields, not adding */
4795
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
5008
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
4796
5009
  int i, j;
4797
5010
  bool need_field_map = false;
4798
5011
 
@@ -4827,12 +5040,10 @@ FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt)
4827
5040
  mr->field_num_map[i][j] = fi_sub ? fi_sub->number : -1;
4828
5041
  }
4829
5042
  }
4830
- }
4831
- else {
5043
+ } else {
4832
5044
  mr->field_num_map = NULL;
4833
5045
  }
4834
5046
 
4835
-
4836
5047
  ir->close_i = &mr_close_ext_i;
4837
5048
 
4838
5049
  return ir_setup(ir, NULL, NULL, fis, false);
@@ -4842,21 +5053,19 @@ FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt)
4842
5053
  * IndexReader
4843
5054
  ****************************************************************************/
4844
5055
 
4845
-
4846
- static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
4847
- {
5056
+ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir) {
4848
5057
  volatile bool success = false;
4849
- FrtIndexReader *volatile ir = NULL;
5058
+ // FrtIndexReader *volatile ir = NULL;
4850
5059
  FrtSegmentInfos *volatile sis = NULL;
4851
5060
  FRT_TRY
4852
5061
  do {
4853
5062
  FrtFieldInfos *fis;
4854
5063
  frt_mutex_lock(&store->mutex);
4855
- frt_sis_read_i(store, fsf);
5064
+ frt_sis_read_i(store, fsf, NULL);
4856
5065
  sis = fsf->ret.sis;
4857
5066
  fis = sis->fis;
4858
5067
  if (sis->size == 1) {
4859
- ir = sr_open(sis, fis, 0, true);
5068
+ ir = sr_open(sis, fis, 0, true, (FrtSegmentReader *)ir);
4860
5069
  }
4861
5070
  else {
4862
5071
  volatile int i;
@@ -4864,7 +5073,7 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
4864
5073
  int num_segments = sis->size;
4865
5074
  for (i = num_segments - 1; i >= 0; i--) {
4866
5075
  FRT_TRY
4867
- readers[i] = sr_open(sis, fis, i, false);
5076
+ readers[i] = sr_open(sis, fis, i, false, NULL);
4868
5077
  FRT_XCATCHALL
4869
5078
  for (i++; i < num_segments; i++) {
4870
5079
  frt_ir_close(readers[i]);
@@ -4872,7 +5081,7 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
4872
5081
  free(readers);
4873
5082
  FRT_XENDTRY
4874
5083
  }
4875
- ir = frt_mr_open_i(store, sis, fis, readers, sis->size);
5084
+ ir = frt_mr_open_i(store, sis, fis, readers, sis->size, ir);
4876
5085
  }
4877
5086
  fsf->ret.ir = ir;
4878
5087
  success = true;
@@ -4881,8 +5090,7 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
4881
5090
  if (!success) {
4882
5091
  if (ir) {
4883
5092
  frt_ir_close(ir);
4884
- }
4885
- else if (sis) {
5093
+ } else if (sis) {
4886
5094
  frt_sis_destroy(sis);
4887
5095
  }
4888
5096
  }
@@ -4894,15 +5102,12 @@ static void ir_open_i(FrtStore *store, FindSegmentsFile *fsf)
4894
5102
  * Will keep a reference to the store. To let this method delete the store
4895
5103
  * make sure you deref the store that you pass to it
4896
5104
  */
4897
- FrtIndexReader *frt_ir_open(FrtStore *store)
4898
- {
5105
+ FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store) {
4899
5106
  FindSegmentsFile fsf;
4900
- sis_find_segments_file(store, &fsf, &ir_open_i);
5107
+ sis_find_segments_file(store, &fsf, &ir_open_i, ir);
4901
5108
  return fsf.ret.ir;
4902
5109
  }
4903
5110
 
4904
-
4905
-
4906
5111
  /****************************************************************************
4907
5112
  *
4908
5113
  * Occurence
@@ -5292,10 +5497,7 @@ static void dw_add_offsets(FrtDocWriter *dw, int pos, off_t start, off_t end)
5292
5497
  dw->offsets_size = pos + 1;
5293
5498
  }
5294
5499
 
5295
- FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
5296
- FrtFieldInverter *fld_inv,
5297
- FrtDocField *df)
5298
- {
5500
+ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDocField *df) {
5299
5501
  FrtMemoryPool *mp = dw->mp;
5300
5502
  FrtAnalyzer *a = dw->analyzer;
5301
5503
  FrtHash *curr_plists = dw->curr_plists;
@@ -5311,7 +5513,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
5311
5513
  int pos = -1, num_terms = 0;
5312
5514
 
5313
5515
  for (i = 0; i < df_size; i++) {
5314
- FrtTokenStream *ts = frt_a_get_ts(a, df->name, df->data[i]);
5516
+ FrtTokenStream *ts = frt_a_get_ts(a, df->name, df->data[i], df->encodings[i]);
5315
5517
  /* ts->reset(ts, df->data[i]); no longer being called */
5316
5518
  if (store_offsets) {
5317
5519
  while (NULL != (tk = ts->next(ts))) {
@@ -5321,21 +5523,16 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
5321
5523
  if (pos < 0) {
5322
5524
  pos = 0;
5323
5525
  }
5324
- dw_add_posting(mp, curr_plists, fld_plists, doc_num,
5325
- tk->text, tk->len, pos);
5326
- dw_add_offsets(dw, pos,
5327
- start_offset + tk->start,
5328
- start_offset + tk->end);
5526
+ dw_add_posting(mp, curr_plists, fld_plists, doc_num, tk->text, tk->len, pos);
5527
+ dw_add_offsets(dw, pos, start_offset + tk->start, start_offset + tk->end);
5329
5528
  if (num_terms++ >= dw->max_field_length) {
5330
5529
  break;
5331
5530
  }
5332
5531
  }
5333
- }
5334
- else {
5532
+ } else {
5335
5533
  while (NULL != (tk = ts->next(ts))) {
5336
5534
  pos += tk->pos_inc;
5337
- dw_add_posting(mp, curr_plists, fld_plists, doc_num,
5338
- tk->text, tk->len, pos);
5535
+ dw_add_posting(mp, curr_plists, fld_plists, doc_num, tk->text, tk->len, pos);
5339
5536
  if (num_terms++ >= dw->max_field_length) {
5340
5537
  break;
5341
5538
  }
@@ -5345,8 +5542,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
5345
5542
  start_offset += df->lengths[i] + 1;
5346
5543
  }
5347
5544
  fld_inv->length = num_terms;
5348
- }
5349
- else {
5545
+ } else {
5350
5546
  char buf[FRT_MAX_WORD_SIZE];
5351
5547
  buf[FRT_MAX_WORD_SIZE - 1] = '\0';
5352
5548
  for (i = 0; i < df_size; i++) {
@@ -5356,11 +5552,9 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
5356
5552
  len = FRT_MAX_WORD_SIZE - 1;
5357
5553
  data_ptr = (char *)memcpy(buf, df->data[i], len);
5358
5554
  }
5359
- dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
5360
- len, i);
5555
+ dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr, len, i);
5361
5556
  if (store_offsets) {
5362
- dw_add_offsets(dw, i, start_offset,
5363
- start_offset + df->lengths[i]);
5557
+ dw_add_offsets(dw, i, start_offset, start_offset + df->lengths[i]);
5364
5558
  }
5365
5559
  start_offset += df->lengths[i] + 1;
5366
5560
  }
@@ -5369,14 +5563,12 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
5369
5563
  return curr_plists;
5370
5564
  }
5371
5565
 
5372
- void frt_dw_reset_postings(FrtHash *postings)
5373
- {
5566
+ void frt_dw_reset_postings(FrtHash *postings) {
5374
5567
  FRT_ZEROSET_N(postings->table, FrtHashEntry, postings->mask + 1);
5375
5568
  postings->fill = postings->size = 0;
5376
5569
  }
5377
5570
 
5378
- void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc)
5379
- {
5571
+ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
5380
5572
  int i;
5381
5573
  float boost;
5382
5574
  FrtDocField *df;
@@ -5398,16 +5590,12 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc)
5398
5590
 
5399
5591
  postings = frt_dw_invert_field(dw, fld_inv, df);
5400
5592
  if (fld_inv->store_term_vector) {
5401
- frt_fw_add_postings(dw->fw, fld_inv->fi->number,
5402
- dw_sort_postings(postings), postings->size,
5403
- dw->offsets, dw->offsets_size);
5593
+ frt_fw_add_postings(dw->fw, fld_inv->fi->number, dw_sort_postings(postings), postings->size, dw->offsets, dw->offsets_size);
5404
5594
  }
5405
5595
 
5406
5596
  if (fld_inv->has_norms) {
5407
- boost = fld_inv->fi->boost * doc->boost * df->boost *
5408
- frt_sim_length_norm(dw->similarity, fi->name, fld_inv->length);
5409
- fld_inv->norms[dw->doc_num] =
5410
- frt_sim_encode_norm(dw->similarity, boost);
5597
+ boost = fld_inv->fi->boost * doc->boost * df->boost * frt_sim_length_norm(dw->similarity, fi->name, fld_inv->length);
5598
+ fld_inv->norms[dw->doc_num] = frt_sim_encode_norm(dw->similarity, boost);
5411
5599
  }
5412
5600
  frt_dw_reset_postings(postings);
5413
5601
  if (dw->offsets_size > 0) {
@@ -5960,15 +6148,12 @@ static void iw_commit_compound_file(FrtIndexWriter *iw, FrtSegmentInfo *si)
5960
6148
  iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
5961
6149
  }
5962
6150
 
5963
- static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg,
5964
- const int max_seg)
5965
- {
6151
+ static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg, const int max_seg) {
5966
6152
  int i;
5967
6153
  FrtSegmentInfos *sis = iw->sis;
5968
6154
  FrtSegmentInfo *si = frt_sis_new_segment(sis, 0, iw->store);
5969
6155
 
5970
- SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg],
5971
- max_seg - min_seg);
6156
+ SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg], max_seg - min_seg);
5972
6157
 
5973
6158
  /* This is where all the action happens. */
5974
6159
  si->doc_cnt = sm_merge(merger);
@@ -6080,8 +6265,7 @@ void frt_iw_commit(FrtIndexWriter *iw)
6080
6265
  frt_mutex_unlock(&iw->mutex);
6081
6266
  }
6082
6267
 
6083
- void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term)
6084
- {
6268
+ void frt_iw_delete_term(FrtIndexWriter *iw, ID field, const char *term) {
6085
6269
  int field_num = frt_fis_get_field_num(iw->fis, field);
6086
6270
  if (field_num >= 0) {
6087
6271
  int i;
@@ -6092,7 +6276,7 @@ void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term)
6092
6276
  const int seg_cnt = sis->size;
6093
6277
  bool did_delete = false;
6094
6278
  for (i = 0; i < seg_cnt; i++) {
6095
- FrtIndexReader *ir = sr_open(sis, iw->fis, i, false);
6279
+ FrtIndexReader *ir = sr_open(sis, iw->fis, i, false, NULL);
6096
6280
  FrtTermDocEnum *tde = ir->term_docs(ir);
6097
6281
  ir->deleter = iw->deleter;
6098
6282
  stde_seek(tde, field_num, term);
@@ -6114,9 +6298,7 @@ void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term)
6114
6298
  }
6115
6299
  }
6116
6300
 
6117
- void frt_iw_delete_terms(FrtIndexWriter *iw, FrtSymbol field,
6118
- char **terms, const int term_cnt)
6119
- {
6301
+ void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int term_cnt) {
6120
6302
  int field_num = frt_fis_get_field_num(iw->fis, field);
6121
6303
  if (field_num >= 0) {
6122
6304
  int i;
@@ -6127,7 +6309,7 @@ void frt_iw_delete_terms(FrtIndexWriter *iw, FrtSymbol field,
6127
6309
  const int seg_cnt = sis->size;
6128
6310
  bool did_delete = false;
6129
6311
  for (i = 0; i < seg_cnt; i++) {
6130
- FrtIndexReader *ir = sr_open(sis, iw->fis, i, false);
6312
+ FrtIndexReader *ir = sr_open(sis, iw->fis, i, false, NULL);
6131
6313
  FrtTermDocEnum *tde = ir->term_docs(ir);
6132
6314
  int j;
6133
6315
  for (j = 0 ; j < term_cnt; j++) {
@@ -6196,10 +6378,13 @@ void frt_iw_close(FrtIndexWriter *iw)
6196
6378
  free(iw);
6197
6379
  }
6198
6380
 
6199
- FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *volatile analyzer,
6200
- const FrtConfig *config)
6201
- {
6202
- FrtIndexWriter *iw = FRT_ALLOC_AND_ZERO(FrtIndexWriter);
6381
+ FrtIndexWriter *frt_iw_alloc(void) {
6382
+ return FRT_ALLOC_AND_ZERO(FrtIndexWriter);
6383
+ }
6384
+
6385
+ FrtIndexWriter *frt_iw_open(FrtIndexWriter *iw, FrtStore *store, FrtAnalyzer *volatile analyzer, const FrtConfig *config) {
6386
+ if (iw == NULL)
6387
+ iw = frt_iw_alloc();
6203
6388
  frt_mutex_init(&iw->mutex, NULL);
6204
6389
  iw->store = store;
6205
6390
  if (!config) {
@@ -6230,7 +6415,7 @@ FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *volatile analyzer,
6230
6415
 
6231
6416
  iw->similarity = frt_sim_create_default();
6232
6417
  iw->analyzer = analyzer ? (FrtAnalyzer *)analyzer
6233
- : frt_mb_standard_analyzer_new(true);
6418
+ : frt_standard_analyzer_new(true);
6234
6419
 
6235
6420
  iw->deleter = frt_deleter_new(iw->sis, store);
6236
6421
  deleter_delete_deletable_files(iw->deleter);
@@ -6242,9 +6427,7 @@ FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *volatile analyzer,
6242
6427
  /*******************/
6243
6428
  /*** Add Indexes ***/
6244
6429
  /*******************/
6245
- static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
6246
- const char *segment, int *map)
6247
- {
6430
+ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *segment, int *map) {
6248
6431
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
6249
6432
  FrtOutStream *fdt_out, *fdx_out;
6250
6433
  FrtInStream *fdt_in, *fdx_in;
@@ -6271,7 +6454,6 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
6271
6454
  frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
6272
6455
  }
6273
6456
 
6274
-
6275
6457
  if (map) {
6276
6458
  int i;
6277
6459
  const int max_doc = sr_max_doc(IR(sr));
@@ -6292,10 +6474,14 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
6292
6474
  frt_os_write_vint(fdt_out, df_size);
6293
6475
  /* sum total lengths of FrtDocField */
6294
6476
  for (k = 0; k < df_size; k++) {
6295
- /* Each field has one ' ' byte so add 1 */
6296
- const int flen = frt_is_read_vint(fdt_in);
6477
+ const int flen = frt_is_read_vint(fdt_in); /* length */
6478
+ const int fenc = frt_is_read_vint(fdt_in); /* encoding */
6479
+ const int fcmp = frt_is_read_vint(fdt_in); /* compression */
6297
6480
  frt_os_write_vint(fdt_out, flen);
6298
- data_len += flen + 1;
6481
+ frt_os_write_vint(fdt_out, fenc);
6482
+ frt_os_write_vint(fdt_out, fcmp);
6483
+ /* Each field has one ' ' byte so add 1 */
6484
+ data_len += flen + 1;
6299
6485
  }
6300
6486
  }
6301
6487
  frt_is2os_copy_bytes(fdt_in, fdt_out, data_len);
@@ -6318,8 +6504,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
6318
6504
  frt_os_write_vint(fdt_out, tv_size);
6319
6505
  }
6320
6506
  }
6321
- }
6322
- else {
6507
+ } else {
6323
6508
  frt_is2os_copy_bytes(fdt_in, fdt_out, frt_is_length(fdt_in));
6324
6509
  frt_is2os_copy_bytes(fdx_in, fdx_out, frt_is_length(fdx_in));
6325
6510
  }
@@ -6329,7 +6514,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, SegmentReader *sr,
6329
6514
  frt_os_close(fdx_out);
6330
6515
  }
6331
6516
 
6332
- static void iw_cp_terms(FrtIndexWriter *iw, SegmentReader *sr,
6517
+ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6333
6518
  const char *segment, int *map)
6334
6519
  {
6335
6520
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
@@ -6398,7 +6583,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, SegmentReader *sr,
6398
6583
  frt_os_close(prx_out);
6399
6584
  }
6400
6585
 
6401
- static void iw_cp_norms(FrtIndexWriter *iw, SegmentReader *sr,
6586
+ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6402
6587
  FrtSegmentInfo *si, int *map)
6403
6588
  {
6404
6589
  int i;
@@ -6429,9 +6614,7 @@ static void iw_cp_norms(FrtIndexWriter *iw, SegmentReader *sr,
6429
6614
  }
6430
6615
  }
6431
6616
 
6432
- static void iw_cp_map_files(FrtIndexWriter *iw, SegmentReader *sr,
6433
- FrtSegmentInfo *si)
6434
- {
6617
+ static void iw_cp_map_files(FrtIndexWriter *iw, FrtSegmentReader *sr, FrtSegmentInfo *si) {
6435
6618
  int i;
6436
6619
  FrtFieldInfos *from_fis = IR(sr)->fis;
6437
6620
  FrtFieldInfos *to_fis = iw->fis;
@@ -6449,15 +6632,13 @@ static void iw_cp_map_files(FrtIndexWriter *iw, SegmentReader *sr,
6449
6632
  free(field_map);
6450
6633
  }
6451
6634
 
6452
- static void iw_cp_files(FrtIndexWriter *iw, SegmentReader *sr,
6453
- FrtSegmentInfo *si)
6454
- {
6635
+ static void iw_cp_files(FrtIndexWriter *iw, FrtSegmentReader *sr, FrtSegmentInfo *si) {
6455
6636
  iw_cp_fields(iw, sr, si->name, NULL);
6456
6637
  iw_cp_terms( iw, sr, si->name, NULL);
6457
6638
  iw_cp_norms( iw, sr, si, NULL);
6458
6639
  }
6459
6640
 
6460
- static void iw_add_segment(FrtIndexWriter *iw, SegmentReader *sr)
6641
+ static void iw_add_segment(FrtIndexWriter *iw, FrtSegmentReader *sr)
6461
6642
  {
6462
6643
  FrtSegmentInfo *si = frt_sis_new_segment(iw->sis, 0, iw->store);
6463
6644
  FrtFieldInfos *fis = iw->fis;
@@ -6472,7 +6653,7 @@ static void iw_add_segment(FrtIndexWriter *iw, SegmentReader *sr)
6472
6653
  FrtFieldInfo *fi = sub_fis->fields[j];
6473
6654
  FrtFieldInfo *new_fi = frt_fis_get_field(fis, fi->name);
6474
6655
  if (NULL == new_fi) {
6475
- new_fi = frt_fi_new(fi->name, FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
6656
+ new_fi = frt_fi_new(fi->name, FRT_STORE_NO, FRT_COMPRESSION_NONE, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
6476
6657
  new_fi->bits = fi->bits;
6477
6658
  frt_fis_add_field(fis, new_fi);
6478
6659
  }