isomorfeus-ferret 0.17.1 → 0.17.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
  3. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
  5. data/ext/isomorfeus_ferret_ext/bm_store.c +2 -0
  6. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
  7. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
  8. data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
  9. data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
  10. data/ext/isomorfeus_ferret_ext/frb_index.c +118 -160
  11. data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
  12. data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
  13. data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
  14. data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
  15. data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
  16. data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
  17. data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
  18. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
  19. data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
  20. data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
  21. data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
  22. data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
  23. data/ext/isomorfeus_ferret_ext/frt_except.c +50 -6
  24. data/ext/isomorfeus_ferret_ext/frt_except.h +3 -2
  25. data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
  26. data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
  27. data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
  28. data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
  29. data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
  30. data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
  31. data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
  32. data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
  33. data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
  34. data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
  35. data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
  36. data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
  37. data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
  38. data/ext/isomorfeus_ferret_ext/frt_in_stream.c +482 -0
  39. data/ext/isomorfeus_ferret_ext/frt_in_stream.h +241 -0
  40. data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -49
  41. data/ext/isomorfeus_ferret_ext/frt_ind.h +0 -1
  42. data/ext/isomorfeus_ferret_ext/frt_index.c +296 -1857
  43. data/ext/isomorfeus_ferret_ext/frt_index.h +2 -145
  44. data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
  45. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +29 -0
  46. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +19 -0
  47. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +93 -0
  48. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +33 -0
  49. data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
  50. data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
  51. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
  52. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
  53. data/ext/isomorfeus_ferret_ext/frt_out_stream.c +334 -0
  54. data/ext/isomorfeus_ferret_ext/frt_out_stream.h +198 -0
  55. data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
  56. data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
  57. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
  58. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
  59. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
  60. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
  61. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
  62. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
  63. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
  64. data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
  65. data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
  66. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
  67. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
  68. data/ext/isomorfeus_ferret_ext/frt_ram_store.h +12 -0
  69. data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
  70. data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
  71. data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
  72. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -536
  73. data/ext/isomorfeus_ferret_ext/frt_store.h +90 -495
  74. data/ext/isomorfeus_ferret_ext/frt_stream.h +18 -0
  75. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
  76. data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
  77. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
  78. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
  79. data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
  80. data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
  81. data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
  82. data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
  83. data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
  84. data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
  85. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
  86. data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
  87. data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
  88. data/ext/isomorfeus_ferret_ext/test.c +40 -87
  89. data/ext/isomorfeus_ferret_ext/test.h +3 -6
  90. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
  91. data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
  92. data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
  93. data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
  94. data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
  95. data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
  96. data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
  97. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -24
  98. data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
  99. data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
  100. data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
  101. data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
  102. data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
  103. data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
  104. data/ext/isomorfeus_ferret_ext/test_index.c +277 -495
  105. data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
  106. data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
  107. data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
  108. data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
  109. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
  110. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
  111. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
  112. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
  113. data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
  114. data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
  115. data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
  116. data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
  117. data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
  118. data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
  119. data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
  120. data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
  121. data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
  122. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
  123. data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
  124. data/ext/isomorfeus_ferret_ext/test_threading.c +15 -30
  125. data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
  126. data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
  127. data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
  128. data/lib/isomorfeus/ferret/index/index.rb +1 -12
  129. data/lib/isomorfeus/ferret/version.rb +1 -1
  130. metadata +43 -4
@@ -1,4 +1,6 @@
1
1
  #include "frt_global.h"
2
+ #include "frt_lazy_doc_field.h"
3
+ #include "frt_lazy_doc.h"
2
4
  #include "frt_index.h"
3
5
  #include "frt_similarity.h"
4
6
  #include "frt_helper.h"
@@ -6,13 +8,6 @@
6
8
  #include <string.h>
7
9
  #include <limits.h>
8
10
  #include <ctype.h>
9
- #include "brotli_decode.h"
10
- #include "brotli_encode.h"
11
- #include "bzlib.h"
12
- #include "lz4frame.h"
13
-
14
- // #undef close
15
- // #undef read
16
11
 
17
12
  extern rb_encoding *utf8_encoding;
18
13
  extern void frt_micro_sleep(const int micro_seconds);
@@ -46,19 +41,15 @@ static char *ste_next(FrtTermEnum *te);
46
41
  #define FORMAT 15
47
42
  #define SEGMENTS_GEN_FILE_NAME "segments"
48
43
  #define MAX_EXT_LEN 10
49
- #define FRT_COMPRESSION_BUFFER_SIZE 16348
50
- #define FRT_BROTLI_COMPRESSION_LEVEL 4
51
- #define FRT_BZIP_COMPRESSION_LEVEL 9
52
44
 
53
45
  /* *** Must be three characters *** */
54
46
  static const char *INDEX_EXTENSIONS[] = {
55
- "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
47
+ "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen"
56
48
  };
57
49
 
58
50
  static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
59
51
 
60
- static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
61
- {
52
+ static char *u64_to_str36(char *buf, int buf_size, frt_u64 u) {
62
53
  int i = buf_size - 1;
63
54
  buf[i] = '\0';
64
55
  for (i--; i >= 0; i--) {
@@ -75,17 +66,14 @@ static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
75
66
  return buf + i;
76
67
  }
77
68
 
78
- static frt_u64 str36_to_u64(char *p)
79
- {
69
+ static frt_u64 str36_to_u64(char *p) {
80
70
  frt_u64 u = 0;
81
71
  while (true) {
82
72
  if ('0' <= *p && '9' >= *p) {
83
73
  u = u * 36 + *p - '0';
84
- }
85
- else if ('a' <= *p && 'z' >= *p) {
74
+ } else if ('a' <= *p && 'z' >= *p) {
86
75
  u = u * 36 + *p - 'a' + 10;
87
- }
88
- else {
76
+ } else {
89
77
  break;
90
78
  }
91
79
  p++;
@@ -142,12 +130,10 @@ static char *fn_for_gen_field(char *buf,
142
130
  const char *base,
143
131
  const char *ext,
144
132
  frt_i64 gen,
145
- int field_num)
146
- {
133
+ int field_num) {
147
134
  if (-1 == gen) {
148
135
  return NULL;
149
- }
150
- else {
136
+ } else {
151
137
  char b[FRT_SEGMENT_NAME_MAX_LENGTH];
152
138
  sprintf(buf, "%s_%s.%s%d",
153
139
  base,
@@ -164,18 +150,15 @@ static char *fn_for_gen_field(char *buf,
164
150
  *
165
151
  ***************************************************************************/
166
152
 
167
- static unsigned long co_hash(const void *key)
168
- {
169
- return (unsigned long)key;
153
+ static unsigned long co_hash(const void *key) {
154
+ return (unsigned long)(uintptr_t)key;
170
155
  }
171
156
 
172
- static int co_eq(const void *key1, const void *key2)
173
- {
157
+ static int co_eq(const void *key1, const void *key2) {
174
158
  return (key1 == key2);
175
159
  }
176
160
 
177
- static void co_destroy(FrtCacheObject *self)
178
- {
161
+ static void co_destroy(FrtCacheObject *self) {
179
162
  frt_h_rem(self->ref_tab1, self->ref2, false);
180
163
  frt_h_rem(self->ref_tab2, self->ref1, false);
181
164
  self->destroy(self->obj);
@@ -183,8 +166,7 @@ static void co_destroy(FrtCacheObject *self)
183
166
  }
184
167
 
185
168
  FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
186
- void *ref1, void *ref2, frt_free_ft destroy, void *obj)
187
- {
169
+ void *ref1, void *ref2, frt_free_ft destroy, void *obj) {
188
170
  FrtCacheObject *self = FRT_ALLOC(FrtCacheObject);
189
171
  frt_h_set(ref_tab1, ref2, self);
190
172
  frt_h_set(ref_tab2, ref1, self);
@@ -201,302 +183,13 @@ FrtHash *frt_co_hash_create(void) {
201
183
  return frt_h_new(&co_hash, &co_eq, (frt_free_ft)NULL, (frt_free_ft)&co_destroy);
202
184
  }
203
185
 
204
- /****************************************************************************
205
- *
206
- * FieldInfo
207
- *
208
- ****************************************************************************/
209
-
210
- static void fi_check_params(unsigned int bits) {
211
- if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
212
- FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
213
- }
214
- if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
215
- FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
216
- }
217
- }
218
-
219
- FrtFieldInfo *frt_fi_alloc(void) {
220
- return FRT_ALLOC(FrtFieldInfo);
221
- }
222
-
223
- FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
224
- assert(NULL != name);
225
- fi_check_params(bits);
226
- fi->name = name;
227
- fi->boost = 1.0f;
228
- fi->bits = bits;
229
- fi->number = 0;
230
- fi->ref_cnt = 1;
231
- fi->rfi = Qnil;
232
- return fi;
233
- }
234
-
235
- FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
236
- FrtFieldInfo *fi = frt_fi_alloc();
237
- return frt_fi_init(fi, name, bits);
238
- }
239
-
240
- void frt_fi_deref(FrtFieldInfo *fi) {
241
- if (FRT_DEREF(fi) == 0) free(fi);
242
- }
243
-
244
- FrtCompressionType frt_fi_get_compression(FrtFieldInfo *fi) {
245
- if (bits_is_compressed(fi->bits)) {
246
- if (bits_is_compressed_brotli(fi->bits)) {
247
- return FRT_COMPRESSION_BROTLI;
248
- } else if (bits_is_compressed_bz2(fi->bits)) {
249
- return FRT_COMPRESSION_BZ2;
250
- } else if (bits_is_compressed_lz4(fi->bits)) {
251
- return FRT_COMPRESSION_LZ4;
252
- } else {
253
- return FRT_COMPRESSION_BROTLI;
254
- }
255
- } else {
256
- return FRT_COMPRESSION_NONE;
257
- }
258
- }
259
-
260
- char *frt_fi_to_s(FrtFieldInfo *fi)
261
- {
262
- const char *fi_name = rb_id2name(fi->name);
263
- char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
264
- char *s = str;
265
- s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
266
- bits_is_stored(fi->bits) ? "is_stored, " : "",
267
- bits_is_compressed(fi->bits) ? "is_compressed, " : "",
268
- bits_is_indexed(fi->bits) ? "is_indexed, " : "",
269
- bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
270
- bits_omit_norms(fi->bits) ? "omit_norms, " : "",
271
- bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
272
- bits_store_positions(fi->bits) ? "store_positions, " : "",
273
- bits_store_offsets(fi->bits) ? "store_offsets, " : "");
274
- s -= 2;
275
- if (*s != ',') {
276
- s += 2;
277
- }
278
-
279
- sprintf(s, ")]");
280
- return str;
281
- }
282
-
283
- /****************************************************************************
284
- *
285
- * FieldInfos
286
- *
287
- ****************************************************************************/
288
-
289
- FrtFieldInfos *frt_fis_alloc(void) {
290
- return FRT_ALLOC(FrtFieldInfos);
291
- }
292
-
293
- FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
294
- fi_check_params(bits);
295
- fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
296
- fis->size = 0;
297
- fis->capa = FIELD_INFOS_INIT_CAPA;
298
- fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
299
- fis->bits = bits;
300
- fis->ref_cnt = 1;
301
- fis->rfis = Qnil;
302
- return fis;
303
- }
304
-
305
- FrtFieldInfos *frt_fis_new(unsigned int bits) {
306
- FrtFieldInfos *fis = frt_fis_alloc();
307
- return frt_fis_init(fis, bits);
308
- }
309
-
310
- FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
311
- if (fis->size == fis->capa) {
312
- fis->capa <<= 1;
313
- FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
314
- }
315
- if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
316
- FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
317
- }
318
- FRT_REF(fi);
319
- fi->number = fis->size;
320
- fis->fields[fis->size] = fi;
321
- fis->size++;
322
- return fi;
323
- }
324
-
325
- FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
326
- return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
327
- }
328
-
329
- int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
330
- FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
331
- if (fi) { return fi->number; }
332
- else { return -1; }
333
- }
334
-
335
- FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
336
- FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
337
- if (!fi) {
338
- fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
339
- frt_fis_add_field(fis, fi);
340
- }
341
- return fi;
342
- }
343
-
344
- FrtFieldInfos *frt_fis_read(FrtInStream *is)
345
- {
346
- FrtFieldInfos *volatile fis = NULL;
347
- char *field_name;
348
- FRT_TRY
349
- do {
350
- volatile int i;
351
- union { frt_u32 i; float f; } tmp;
352
- FrtFieldInfo *volatile fi;
353
- fis = frt_fis_new(frt_is_read_vint(is));
354
- for (i = frt_is_read_vint(is); i > 0; i--) {
355
- fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
356
- FRT_TRY
357
- field_name = frt_is_read_string_safe(is);
358
- fi->name = rb_intern(field_name);
359
- free(field_name);
360
- tmp.i = frt_is_read_u32(is);
361
- fi->boost = tmp.f;
362
- fi->bits = frt_is_read_vint(is);
363
- FRT_XCATCHALL
364
- free(fi);
365
- FRT_XENDTRY
366
- frt_fis_add_field(fis, fi);
367
- fi->ref_cnt = 1;
368
- }
369
- } while (0);
370
- FRT_XCATCHALL
371
- frt_fis_deref(fis);
372
- FRT_XENDTRY
373
- return fis;
374
- }
375
-
376
- void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
377
- {
378
- int i;
379
- union { frt_u32 i; float f; } tmp;
380
- FrtFieldInfo *fi;
381
- const int fis_size = fis->size;
382
-
383
- frt_os_write_vint(os, fis->bits);
384
- frt_os_write_vint(os, fis->size);
385
-
386
- for (i = 0; i < fis_size; i++) {
387
- fi = fis->fields[i];
388
-
389
- frt_os_write_string(os, rb_id2name(fi->name));
390
- tmp.f = fi->boost;
391
- frt_os_write_u32(os, tmp.i);
392
- frt_os_write_vint(os, fi->bits);
393
- }
394
- }
395
-
396
- static const char *store_str[] = {
397
- ":no",
398
- ":yes",
399
- "",
400
- ":compressed"
401
- };
402
-
403
- static const char *fi_store_str(FrtFieldInfo *fi)
404
- {
405
- return store_str[fi->bits & 0x3];
406
- }
407
-
408
- static const char *index_str[] = {
409
- ":no",
410
- ":untokenized",
411
- "",
412
- ":yes",
413
- "",
414
- ":untokenized_omit_norms",
415
- "",
416
- ":omit_norms"
417
- };
418
-
419
- static const char *fi_index_str(FrtFieldInfo *fi)
420
- {
421
- return index_str[(fi->bits >> 2) & 0x7];
422
- }
423
-
424
- static const char *term_vector_str[] = {
425
- ":no",
426
- ":yes",
427
- "",
428
- ":with_positions",
429
- "",
430
- ":with_offsets",
431
- "",
432
- ":with_positions_offsets"
433
- };
434
-
435
- static const char *fi_term_vector_str(FrtFieldInfo *fi)
436
- {
437
- return term_vector_str[(fi->bits >> 5) & 0x7];
438
- }
439
-
440
- char *frt_fis_to_s(FrtFieldInfos *fis)
441
- {
442
- int i, pos, capa = 200 + fis->size * 120;
443
- char *buf = FRT_ALLOC_N(char, capa);
444
- FrtFieldInfo *fi;
445
- const int fis_size = fis->size;
446
-
447
- pos = sprintf(buf,
448
- "default:\n"
449
- " store: %s\n"
450
- " index: %s\n"
451
- " term_vector: %s\n"
452
- "fields:\n",
453
- store_str[fis->bits & 0x3],
454
- index_str[(fis->bits >> 2) & 0x7],
455
- term_vector_str[(fis->bits >> 5) & 0x7]);
456
- for (i = 0; i < fis_size; i++) {
457
- fi = fis->fields[i];
458
- pos += sprintf(buf + pos,
459
- " %s:\n"
460
- " boost: %f\n"
461
- " store: %s\n"
462
- " index: %s\n"
463
- " term_vector: %s\n",
464
- rb_id2name(fi->name), fi->boost, fi_store_str(fi),
465
- fi_index_str(fi), fi_term_vector_str(fi));
466
- }
467
-
468
- return buf;
469
- }
470
-
471
- void frt_fis_deref(FrtFieldInfos *fis) {
472
- if (FRT_DEREF(fis) == 0) {
473
- frt_h_destroy(fis->field_dict);
474
- free(fis->fields);
475
- free(fis);
476
- }
477
- }
478
-
479
- static bool fis_has_vectors(FrtFieldInfos *fis)
480
- {
481
- int i;
482
- const int fis_size = fis->size;
483
-
484
- for (i = 0; i < fis_size; i++) {
485
- if (bits_store_term_vector(fis->fields[i]->bits)) {
486
- return true;
487
- }
488
- }
489
- return false;
490
- }
491
-
492
186
  /****************************************************************************
493
187
  *
494
188
  * SegmentInfo
495
189
  *
496
190
  ****************************************************************************/
497
191
 
498
- FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
499
- {
192
+ FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store) {
500
193
  FrtSegmentInfo *si = FRT_ALLOC(FrtSegmentInfo);
501
194
  si->name = name;
502
195
  si->doc_cnt = doc_cnt;
@@ -509,8 +202,7 @@ FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
509
202
  return si;
510
203
  }
511
204
 
512
- static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
513
- {
205
+ static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is) {
514
206
  FrtSegmentInfo *volatile si = FRT_ALLOC_AND_ZERO(FrtSegmentInfo);
515
207
  FRT_TRY
516
208
  si->store = store;
@@ -535,8 +227,7 @@ static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
535
227
  return si;
536
228
  }
537
229
 
538
- static void si_write(FrtSegmentInfo *si, FrtOutStream *os)
539
- {
230
+ static void si_write(FrtSegmentInfo *si, FrtOutStream *os) {
540
231
  frt_os_write_string(os, si->name);
541
232
  frt_os_write_vint(os, si->doc_cnt);
542
233
  frt_os_write_vint(os, si->del_gen);
@@ -558,13 +249,11 @@ void frt_si_close(FrtSegmentInfo *si) {
558
249
  }
559
250
  }
560
251
 
561
- bool frt_si_has_deletions(FrtSegmentInfo *si)
562
- {
252
+ bool frt_si_has_deletions(FrtSegmentInfo *si) {
563
253
  return si->del_gen >= 0;
564
254
  }
565
255
 
566
- void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
567
- {
256
+ void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num) {
568
257
  if (field_num >= si->norm_gens_size) {
569
258
  int i;
570
259
  FRT_REALLOC_N(si->norm_gens, int, field_num + 1);
@@ -576,8 +265,7 @@ void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
576
265
  si->norm_gens[field_num]++;
577
266
  }
578
267
 
579
- static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
580
- {
268
+ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num) {
581
269
  int norm_gen;
582
270
  if (field_num >= si->norm_gens_size
583
271
  || 0 > (norm_gen = si->norm_gens[field_num])) {
@@ -590,38 +278,13 @@ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
590
278
 
591
279
  void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
592
280
 
593
- static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *dlr)
594
- {
595
- int i;
596
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
597
- size_t seg_len = strlen(si->name);
598
- char *ext;
599
-
600
- for (i = si->norm_gens_size - 1; i >= 0; i--) {
601
- if (0 <= si->norm_gens[i]) {
602
- frt_deleter_queue_file(dlr, si_norm_file_name(si, file_name, fis->fields[i]->number));
603
- }
604
- }
605
-
606
- memcpy(file_name, si->name, seg_len);
607
- file_name[seg_len] = '.';
608
- ext = file_name + seg_len + 1;
609
-
610
- for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
611
- memcpy(ext, INDEX_EXTENSIONS[i], 4);
612
- frt_deleter_queue_file(dlr, file_name);
613
- }
614
- }
615
-
616
281
  /****************************************************************************
617
282
  *
618
283
  * SegmentInfos
619
284
  *
620
285
  ****************************************************************************/
621
286
 
622
- #include <time.h>
623
- static char *new_segment(frt_i64 generation)
624
- {
287
+ static char *new_segment(frt_i64 generation) {
625
288
  char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
626
289
  char *fn_p = u64_to_str36(buf, FRT_SEGMENT_NAME_MAX_LENGTH - 1,
627
290
  (frt_u64)generation);
@@ -642,8 +305,7 @@ typedef struct FindSegmentsFile {
642
305
  } ret;
643
306
  } FindSegmentsFile;
644
307
 
645
- static void which_gen_i(const char *file_name, void *arg)
646
- {
308
+ static void which_gen_i(const char *file_name, void *arg) {
647
309
  frt_i64 *max_generation = (frt_i64 *)arg;
648
310
  if (0 == strncmp(FRT_SEGMENTS_FILE_NAME"_", file_name,
649
311
  sizeof(FRT_SEGMENTS_FILE_NAME))) {
@@ -689,10 +351,9 @@ void frt_sis_put(FrtSegmentInfos *sis, FILE *stream) {
689
351
  *
690
352
  * @param store - the Store to look in
691
353
  */
692
- frt_i64 frt_sis_current_segment_generation(FrtStore *store)
693
- {
354
+ frt_i64 frt_sis_current_segment_generation(FrtStore *store) {
694
355
  frt_i64 current_generation = -1;
695
- store->each(store, &which_gen_i, &current_generation);
356
+ store->each(store, segm_idx_name, &which_gen_i, &current_generation);
696
357
  return current_generation;
697
358
  }
698
359
 
@@ -703,8 +364,7 @@ frt_i64 frt_sis_current_segment_generation(FrtStore *store)
703
364
  * @param store - the Store to look in
704
365
  * @return segments_N where N is the current generation
705
366
  */
706
- char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
707
- {
367
+ char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store) {
708
368
  return segfn_for_generation(buf, frt_sis_current_segment_generation(store));
709
369
  }
710
370
 
@@ -717,16 +377,14 @@ char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store)
717
377
  */
718
378
  /*
719
379
  FIXME: not used
720
- static char *sis_next_seg_file_name(char *buf, FrtStore *store)
721
- {
380
+ static char *sis_next_seg_file_name(char *buf, FrtStore *store) {
722
381
  return segfn_for_generation(buf, frt_sis_current_segment_generation(store) + 1);
723
382
  }
724
383
  */
725
384
 
726
385
  #define GEN_FILE_RETRY_COUNT 10
727
386
  #define GEN_LOOK_AHEAD_COUNT 10
728
- static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir)
729
- {
387
+ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void (*run)(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir), FrtIndexReader *ir) {
730
388
  volatile int i;
731
389
  volatile int gen_look_ahead_count = 0;
732
390
  volatile bool retry = false;
@@ -763,7 +421,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
763
421
  FrtInStream *gen_is;
764
422
  gen_is = NULL;
765
423
  FRT_TRY
766
- gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
424
+ gen_is = store->open_input(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
767
425
  FRT_XCATCHALL
768
426
  FRT_HANDLED();
769
427
  /* TODO:LOG "segments open: FRT_IO_ERROR"*/
@@ -813,7 +471,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
813
471
  * this must be a real error. We throw the original exception
814
472
  * we got. */
815
473
  char *listing, listing_buffer[1024];
816
- listing = frt_store_to_s(store);
474
+ listing = frt_store_folder_to_s(store, segm_idx_name);
817
475
  strncpy(listing_buffer, listing, 1023);
818
476
  listing_buffer[1023] = '\0';
819
477
  free(listing);
@@ -846,7 +504,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
846
504
  * and try it if so: */
847
505
  char prev_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
848
506
  segfn_for_generation(prev_seg_file_name, gen - 1);
849
- if (store->exists(store, prev_seg_file_name)) {
507
+ if (store->exists(store, segm_idx_name, prev_seg_file_name)) {
850
508
  /* TODO:LOG "fallback to prior segment file '" +
851
509
  * prevSegmentFileName + "'" */
852
510
  FRT_TRY
@@ -870,8 +528,7 @@ static void sis_find_segments_file(FrtStore *store, FindSegmentsFile *fsf, void
870
528
  }
871
529
  }
872
530
 
873
- FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
874
- {
531
+ FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis) {
875
532
  FrtSegmentInfos *sis = FRT_ALLOC_AND_ZERO(FrtSegmentInfos);
876
533
  FRT_REF(fis);
877
534
  sis->fis = fis;
@@ -885,13 +542,11 @@ FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis)
885
542
  return sis;
886
543
  }
887
544
 
888
- FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store)
889
- {
545
+ FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int doc_cnt, FrtStore *store) {
890
546
  return frt_sis_add_si(sis, frt_si_new(new_segment(sis->counter++), doc_cnt, store));
891
547
  }
892
548
 
893
- void frt_sis_destroy(FrtSegmentInfos *sis)
894
- {
549
+ void frt_sis_destroy(FrtSegmentInfos *sis) {
895
550
  int i;
896
551
  const int sis_size = sis->size;
897
552
  for (i = 0; i < sis_size; i++) {
@@ -903,8 +558,7 @@ void frt_sis_destroy(FrtSegmentInfos *sis)
903
558
  free(sis);
904
559
  }
905
560
 
906
- FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
907
- {
561
+ FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si) {
908
562
  if (sis->size >= sis->capa) {
909
563
  sis->capa <<= 1;
910
564
  FRT_REALLOC_N(sis->segs, FrtSegmentInfo *, sis->capa);
@@ -913,8 +567,7 @@ FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si)
913
567
  return si;
914
568
  }
915
569
 
916
- void frt_sis_del_at(FrtSegmentInfos *sis, int at)
917
- {
570
+ void frt_sis_del_at(FrtSegmentInfos *sis, int at) {
918
571
  int i;
919
572
  const int sis_size = --(sis->size);
920
573
  frt_si_close(sis->segs[at]);
@@ -923,8 +576,7 @@ void frt_sis_del_at(FrtSegmentInfos *sis, int at)
923
576
  }
924
577
  }
925
578
 
926
- void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
927
- {
579
+ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to) {
928
580
  int i, num_to_del = to - from;
929
581
  const int sis_size = sis->size -= num_to_del;
930
582
  for (i = from; i < to; i++) {
@@ -935,8 +587,7 @@ void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to)
935
587
  }
936
588
  }
937
589
 
938
- static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
939
- {
590
+ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
940
591
  int seg_cnt;
941
592
  int i;
942
593
  frt_u32 format = 0;
@@ -947,7 +598,7 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
947
598
  segfn_for_generation(seg_file_name, fsf->generation);
948
599
  fsf->ret.sis = NULL;
949
600
  FRT_TRY
950
- is = store->open_input(store, seg_file_name);
601
+ is = store->open_input(store, segm_idx_name, seg_file_name);
951
602
  sis->store = store;
952
603
  FRT_REF(store);
953
604
  sis->generation = fsf->generation;
@@ -973,22 +624,20 @@ static void frt_sis_read_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReade
973
624
  fsf->ret.sis = sis;
974
625
  }
975
626
 
976
- FrtSegmentInfos *frt_sis_read(FrtStore *store)
977
- {
627
+ FrtSegmentInfos *frt_sis_read(FrtStore *store) {
978
628
  FindSegmentsFile fsf;
979
629
  sis_find_segments_file(store, &fsf, &frt_sis_read_i, NULL);
980
630
  return fsf.ret.sis;
981
631
  }
982
632
 
983
- void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
984
- {
633
+ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter) {
985
634
  int i;
986
635
  FrtOutStream *volatile os = NULL;
987
636
  const int sis_size = sis->size;
988
637
  char buf[FRT_SEGMENT_NAME_MAX_LENGTH];
989
638
  sis->generation++;
990
639
  FRT_TRY
991
- os = store->new_output(store, segfn_for_generation(buf, sis->generation));
640
+ os = store->new_output(store, segm_idx_name, segfn_for_generation(buf, sis->generation));
992
641
  frt_os_write_u32(os, FORMAT);
993
642
  frt_os_write_u64(os, ++(sis->version)); /* every write changes the index */
994
643
  frt_os_write_u64(os, sis->counter);
@@ -1002,7 +651,7 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
1002
651
  FRT_XENDTRY
1003
652
 
1004
653
  FRT_TRY
1005
- os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
654
+ os = store->new_output(store, segm_idx_name, SEGMENTS_GEN_FILE_NAME);
1006
655
  frt_os_write_u64(os, sis->generation);
1007
656
  frt_os_write_u64(os, sis->generation);
1008
657
  FRT_XFINALLY
@@ -1016,15 +665,14 @@ void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter)
1016
665
  }
1017
666
  }
1018
667
 
1019
- static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_)
1020
- {
668
+ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexReader *ir_) {
1021
669
  FrtInStream *is;
1022
670
  frt_u32 format = 0;
1023
671
  frt_u64 version = 0;
1024
672
  char seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
1025
673
 
1026
674
  segfn_for_generation(seg_file_name, (frt_u64)fsf->generation);
1027
- is = store->open_input(store, seg_file_name);
675
+ is = store->open_input(store, segm_idx_name, seg_file_name);
1028
676
 
1029
677
  FRT_TRY
1030
678
  format = frt_is_read_u32(is); // format
@@ -1037,362 +685,12 @@ static void frt_sis_read_ver_i(FrtStore *store, FindSegmentsFile *fsf, FrtIndexR
1037
685
  fsf->ret.uint64 = version;
1038
686
  }
1039
687
 
1040
- frt_u64 frt_sis_read_current_version(FrtStore *store)
1041
- {
688
+ frt_u64 frt_sis_read_current_version(FrtStore *store) {
1042
689
  FindSegmentsFile fsf;
1043
690
  sis_find_segments_file(store, &fsf, &frt_sis_read_ver_i, NULL);
1044
691
  return fsf.ret.uint64;
1045
692
  }
1046
693
 
1047
- /****************************************************************************
1048
- *
1049
- * LazyDocField
1050
- *
1051
- ****************************************************************************/
1052
-
1053
- static FrtLazyDocField *lazy_df_new(ID name, const int size, FrtCompressionType compression) {
1054
- FrtLazyDocField *self = FRT_ALLOC(FrtLazyDocField);
1055
- self->name = name;
1056
- self->size = size;
1057
- self->data = FRT_ALLOC_AND_ZERO_N(FrtLazyDocFieldData, size);
1058
- self->compression = compression;
1059
- self->decompressed = false;
1060
- self->loaded = false;
1061
- return self;
1062
- }
1063
-
1064
- static void lazy_df_destroy(FrtLazyDocField *self) {
1065
- int i;
1066
- for (i = self->size - 1; i >= 0; i--) {
1067
- if (self->data[i].text) {
1068
- free(self->data[i].text);
1069
- }
1070
- }
1071
- free(self->data);
1072
- free(self);
1073
- }
1074
-
1075
- static void comp_raise(void) {
1076
- FRT_RAISE(EXCEPTION, "Compression error");
1077
- }
1078
-
1079
- static char *is_read_brotli_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
1080
- int buf_out_idx = 0;
1081
- int read_len;
1082
- frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
1083
- const frt_uchar *next_in;
1084
- size_t available_in;
1085
- frt_uchar *buf_out = NULL;
1086
- frt_uchar *next_out;
1087
- size_t available_out;
1088
-
1089
- BrotliDecoderState *b_state = BrotliDecoderCreateInstance(NULL, NULL, NULL);
1090
- BrotliDecoderResult b_result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
1091
- if (!b_state) { comp_raise(); return NULL; }
1092
-
1093
- do {
1094
- read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1095
- frt_is_read_bytes(is, buf_in, read_len);
1096
- compressed_len -= read_len;
1097
- available_in = read_len;
1098
- next_in = buf_in;
1099
- available_out = FRT_COMPRESSION_BUFFER_SIZE;
1100
- do {
1101
- FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
1102
- next_out = buf_out + buf_out_idx;
1103
- b_result = BrotliDecoderDecompressStream(b_state,
1104
- &available_in, &next_in,
1105
- &available_out, &next_out, NULL);
1106
- if (b_result == BROTLI_DECODER_RESULT_ERROR) { comp_raise(); return NULL; }
1107
- buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - available_out;
1108
- } while (b_result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
1109
- } while (b_result != BROTLI_DECODER_RESULT_SUCCESS && compressed_len > 0);
1110
-
1111
- BrotliDecoderDestroyInstance(b_state);
1112
-
1113
- FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + 1);
1114
- buf_out[buf_out_idx] = '\0';
1115
- *len = buf_out_idx;
1116
- return (char *)buf_out;
1117
- }
1118
-
1119
- static void zraise(int ret) {
1120
- switch (ret) {
1121
- case BZ_IO_ERROR:
1122
- if (ferror(stdin))
1123
- FRT_RAISE(FRT_IO_ERROR, "bzlib: error reading stdin");
1124
- if (ferror(stdout))
1125
- FRT_RAISE(FRT_IO_ERROR, "bzlib: error writing stdout");
1126
- break;
1127
- case BZ_CONFIG_ERROR:
1128
- FRT_RAISE(FRT_IO_ERROR, "bzlib: system configuration error");
1129
- break;
1130
- case BZ_SEQUENCE_ERROR: /* shouldn't occur if code is correct */
1131
- FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! sequence error");
1132
- break;
1133
- case BZ_PARAM_ERROR: /* shouldn't occur if code is correct */
1134
- FRT_RAISE(FRT_IO_ERROR, "bzlib: !!BUG!! parameter error");
1135
- break;
1136
- case BZ_MEM_ERROR:
1137
- FRT_RAISE(FRT_IO_ERROR, "bzlib: memory error");
1138
- break;
1139
- case BZ_DATA_ERROR:
1140
- FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check error");
1141
- break;
1142
- case BZ_DATA_ERROR_MAGIC:
1143
- FRT_RAISE(FRT_IO_ERROR, "bzlib: data integrity check - non-matching magic");
1144
- break;
1145
- case BZ_UNEXPECTED_EOF:
1146
- FRT_RAISE(FRT_IO_ERROR, "bzlib: unexpected end-of-file");
1147
- break;
1148
- case BZ_OUTBUFF_FULL:
1149
- FRT_RAISE(FRT_IO_ERROR, "bzlib: output buffer full");
1150
- break;
1151
- default:
1152
- FRT_RAISE(FRT_EXCEPTION, "bzlib: unknown error");
1153
- }
1154
- }
1155
-
1156
- static char *is_read_bz2_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
1157
- int buf_out_idx = 0, ret, read_len;
1158
- char *buf_out = NULL;
1159
- char buf_in[FRT_COMPRESSION_BUFFER_SIZE];
1160
- bz_stream zstrm;
1161
- zstrm.bzalloc = NULL;
1162
- zstrm.bzfree = NULL;
1163
- zstrm.opaque = NULL;
1164
- zstrm.next_in = NULL;
1165
- zstrm.avail_in = 0;
1166
- if ((ret = BZ2_bzDecompressInit(&zstrm, 0, 0)) != BZ_OK) zraise(ret);
1167
-
1168
- do {
1169
- read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1170
- frt_is_read_bytes(is, (frt_uchar *)buf_in, read_len);
1171
- compressed_len -= read_len;
1172
- zstrm.avail_in = read_len;
1173
- zstrm.next_in = buf_in;
1174
- zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
1175
-
1176
- do {
1177
- REALLOC_N(buf_out, char, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
1178
- zstrm.next_out = buf_out + buf_out_idx;
1179
- ret = BZ2_bzDecompress(&zstrm);
1180
- assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
1181
- if (ret != BZ_OK && ret != BZ_STREAM_END) {
1182
- (void)BZ2_bzDecompressEnd(&zstrm);
1183
- zraise(ret);
1184
- }
1185
- buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
1186
- } while (zstrm.avail_out == 0);
1187
- } while (ret != BZ_STREAM_END && compressed_len != 0);
1188
-
1189
- (void)BZ2_bzDecompressEnd(&zstrm);
1190
-
1191
- FRT_REALLOC_N(buf_out, char, buf_out_idx + 1);
1192
- buf_out[buf_out_idx] = '\0';
1193
-
1194
- *len = buf_out_idx;
1195
- return (char *)buf_out;
1196
- }
1197
-
1198
- static char *is_read_lz4_compressed_bytes(FrtInStream *is, int compressed_len, int *length) {
1199
- frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
1200
- char *buf_out = NULL;
1201
- int dc_length = 0;
1202
- LZ4F_dctx *dctx;
1203
- LZ4F_frameInfo_t frame_info;
1204
- LZ4F_errorCode_t dctx_status = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
1205
- if (LZ4F_isError(dctx_status)) { *length = -1; return NULL; }
1206
-
1207
- /* header and buffer */
1208
- int read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1209
- frt_is_read_bytes(is, buf_in, read_length);
1210
- compressed_len -= read_length;
1211
-
1212
- size_t consumed_size = read_length;
1213
- size_t res = LZ4F_getFrameInfo(dctx, &frame_info, buf_in, &consumed_size);
1214
- if (LZ4F_isError(res)) { *length = -1; return NULL; }
1215
- size_t buf_out_length;
1216
- switch(frame_info.blockSizeID) {
1217
- case LZ4F_default:
1218
- case LZ4F_max64KB:
1219
- buf_out_length = 1 << 16;
1220
- break;
1221
- case LZ4F_max256KB:
1222
- buf_out_length = 1 << 18;
1223
- break;
1224
- case LZ4F_max1MB:
1225
- buf_out_length = 1 << 20;
1226
- break;
1227
- case LZ4F_max4MB:
1228
- buf_out_length = 1 << 22;
1229
- break;
1230
- default:
1231
- buf_out_length = 0;
1232
- }
1233
-
1234
- res = 1;
1235
- int first_chunk = 1;
1236
-
1237
- /* decompress data */
1238
- while (res != 0) {
1239
- if (!first_chunk) {
1240
- read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
1241
- frt_is_read_bytes(is, buf_in, read_length);
1242
- compressed_len -= read_length;
1243
- consumed_size = 0;
1244
- }
1245
- first_chunk = 0;
1246
-
1247
- char *src = (char *)(buf_in + consumed_size);
1248
- char *src_end = (char *)buf_in + read_length;
1249
-
1250
- while (src < src_end && res != 0){
1251
- size_t dest_length = buf_out_length;
1252
- size_t consumed_size = read_length;
1253
- FRT_REALLOC_N(buf_out, char, dc_length + buf_out_length);
1254
- res = LZ4F_decompress(dctx, buf_out + dc_length, &dest_length, src, &consumed_size, NULL);
1255
- if (LZ4F_isError(res)) { *length = -1; return NULL; }
1256
- dc_length += dest_length;
1257
- src = src + consumed_size;
1258
- }
1259
- }
1260
-
1261
- /* finish up */
1262
- LZ4F_freeDecompressionContext(dctx);
1263
-
1264
- FRT_REALLOC_N(buf_out, char, dc_length + 1);
1265
- buf_out[dc_length] = '\0';
1266
-
1267
- *length = dc_length;
1268
- return buf_out;
1269
- }
1270
-
1271
- static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len, FrtCompressionType compression) {
1272
- switch (compression) {
1273
- case FRT_COMPRESSION_BROTLI:
1274
- return is_read_brotli_compressed_bytes(is, compressed_len, len);
1275
- case FRT_COMPRESSION_BZ2:
1276
- return is_read_bz2_compressed_bytes(is, compressed_len, len);
1277
- case FRT_COMPRESSION_LZ4:
1278
- return is_read_lz4_compressed_bytes(is, compressed_len, len);
1279
- default:
1280
- return NULL;
1281
- }
1282
- }
1283
-
1284
- char *frt_lazy_df_get_data(FrtLazyDocField *self, int i) {
1285
- char *text = NULL;
1286
- if (i < self->size && i >= 0) {
1287
- text = self->data[i].text;
1288
- if (NULL == text) {
1289
- const int read_len = self->data[i].length + 1;
1290
- frt_is_seek(self->doc->fields_in, self->data[i].start);
1291
- if (self->data[i].compression != FRT_COMPRESSION_NONE) {
1292
- self->data[i].text = text = is_read_compressed_bytes(self->doc->fields_in, read_len, &(self->data[i].length), self->data[i].compression);
1293
- } else {
1294
- self->data[i].text = text = FRT_ALLOC_N(char, read_len);
1295
- frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)text, read_len);
1296
- text[read_len - 1] = '\0';
1297
- }
1298
- self->loaded = true;
1299
- }
1300
- }
1301
-
1302
- return text;
1303
- }
1304
-
1305
- void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len) {
1306
- if (self->compression != FRT_COMPRESSION_NONE && !self->decompressed) {
1307
- int i;
1308
- self->len = 0;
1309
- for (i = self->size-1; i >= 0; i--) {
1310
- (void)frt_lazy_df_get_data(self, i);
1311
- self->len += self->data[i].length + 1;
1312
- }
1313
- self->len--; /* each field separated by ' ' but no need to add to end */
1314
- self->decompressed = true;
1315
- }
1316
- if (start < 0 || start >= self->len) {
1317
- FRT_RAISE(FRT_IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
1318
- "is not between 0 and %d", start, self->len);
1319
- }
1320
- if (len <= 0) {
1321
- FRT_RAISE(FRT_IO_ERROR, "len = %d, but should be greater than 0", len);
1322
- }
1323
- if (start + len > self->len) {
1324
- FRT_RAISE(FRT_IO_ERROR, "Tried to read past end of field. Field is only %d "
1325
- "bytes long but tried to read to %d", self->len, start + len);
1326
- }
1327
- if (self->compression != FRT_COMPRESSION_NONE) {
1328
- int cur_start = 0, buf_start = 0, cur_end, i, copy_start, copy_len;
1329
- for (i = 0; i < self->size; i++) {
1330
- cur_end = cur_start + self->data[i].length;
1331
- if (start < cur_end) {
1332
- copy_start = start > cur_start ? start - cur_start : 0;
1333
- copy_len = cur_end - cur_start - copy_start;
1334
- if (copy_len >= len) {
1335
- copy_len = len;
1336
- len = 0;
1337
- }
1338
- else {
1339
- len -= copy_len;
1340
- }
1341
- memcpy(buf + buf_start,
1342
- self->data[i].text + copy_start,
1343
- copy_len);
1344
- buf_start += copy_len;
1345
- if (len > 0) {
1346
- buf[buf_start++] = ' ';
1347
- len--;
1348
- }
1349
- if (len == 0) break;
1350
- }
1351
- cur_start = cur_end + 1;
1352
- }
1353
- } else {
1354
- frt_is_seek(self->doc->fields_in, self->data[0].start + start);
1355
- frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)buf, len);
1356
- }
1357
- }
1358
-
1359
- /****************************************************************************
1360
- *
1361
- * LazyDoc
1362
- *
1363
- ****************************************************************************/
1364
-
1365
- static FrtLazyDoc *lazy_doc_new(int size, FrtInStream *fdt_in)
1366
- {
1367
- FrtLazyDoc *self = FRT_ALLOC(FrtLazyDoc);
1368
- self->field_dictionary = frt_h_new_ptr((frt_free_ft)&lazy_df_destroy);
1369
- self->size = size;
1370
- self->fields = FRT_ALLOC_AND_ZERO_N(FrtLazyDocField *, size);
1371
- self->fields_in = frt_is_clone(fdt_in);
1372
- self->loaded = false;
1373
- return self;
1374
- }
1375
-
1376
- void frt_lazy_doc_close(FrtLazyDoc *self)
1377
- {
1378
- frt_h_destroy(self->field_dictionary);
1379
- frt_is_close(self->fields_in);
1380
- free(self->fields);
1381
- free(self);
1382
- }
1383
-
1384
- static void lazy_doc_add_field(FrtLazyDoc *self, FrtLazyDocField *lazy_df, int i)
1385
- {
1386
- self->fields[i] = lazy_df;
1387
-
1388
- frt_h_set(self->field_dictionary, (void *)lazy_df->name, lazy_df);
1389
- lazy_df->doc = self;
1390
- }
1391
-
1392
- FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field) {
1393
- return (FrtLazyDocField *)frt_h_get(self->field_dictionary, (void *)field);
1394
- }
1395
-
1396
694
  /****************************************************************************
1397
695
  * FrtFieldsReader
1398
696
  ****************************************************************************/
@@ -1409,9 +707,9 @@ FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos
1409
707
  fr->fis = fis;
1410
708
 
1411
709
  strcpy(file_name + segment_len, ".fdt");
1412
- fr->fdt_in = store->open_input(store, file_name);
710
+ fr->fdt_in = store->open_input(store, segm_idx_name, file_name);
1413
711
  strcpy(file_name + segment_len, ".fdx");
1414
- fr->fdx_in = store->open_input(store, file_name);
712
+ fr->fdx_in = store->open_input(store, segm_idx_name, file_name);
1415
713
  fr->size = frt_is_length(fr->fdx_in) / FIELDS_IDX_PTR_SIZE;
1416
714
  fr->store = store;
1417
715
  FRT_REF(store);
@@ -1437,32 +735,30 @@ void frt_fr_close(FrtFieldsReader *fr) {
1437
735
  free(fr);
1438
736
  }
1439
737
 
1440
- static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression) {
738
+ static FrtDocField *frt_fr_df_new(ID name, int size, FrtCompressionType compression_type) {
1441
739
  FrtDocField *df = FRT_ALLOC(FrtDocField);
1442
740
  df->name = name;
1443
741
  df->capa = df->size = size;
1444
- df->data = FRT_ALLOC_N(char *, df->capa);
742
+ df->data = FRT_ALLOC_N(const char *, df->capa);
1445
743
  df->lengths = FRT_ALLOC_N(int, df->capa);
1446
744
  df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
1447
- df->destroy_data = true;
1448
745
  df->boost = 1.0f;
1449
- df->compression = compression;
746
+ df->compression_type = compression_type;
1450
747
  return df;
1451
748
  }
1452
749
 
1453
- static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression) {
750
+ static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df, FrtCompressionType compression_type) {
1454
751
  int i;
1455
752
  const int df_size = df->size;
1456
753
  FrtInStream *fdt_in = fr->fdt_in;
1457
754
 
1458
755
  for (i = 0; i < df_size; i++) {
1459
- const int compressed_len = df->lengths[i] + 1;
1460
- df->data[i] = is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression);
756
+ const int compressed_len = df->lengths[i];
757
+ df->data[i] = frt_is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]), compression_type);
1461
758
  }
1462
759
  }
1463
760
 
1464
- FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
1465
- {
761
+ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num) {
1466
762
  int i, j;
1467
763
  frt_off_t pos;
1468
764
  int stored_cnt;
@@ -1478,28 +774,29 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
1478
774
  for (i = 0; i < stored_cnt; i++) {
1479
775
  const int field_num = frt_is_read_vint(fdt_in);
1480
776
  FrtFieldInfo *fi = fr->fis->fields[field_num];
1481
- const int df_size = frt_is_read_vint(fdt_in);
1482
- FrtDocField *df = frt_fr_df_new(fi->name, df_size, frt_fi_get_compression(fi));
777
+ const int df_field_count = frt_is_read_vint(fdt_in);
778
+ FrtDocField *df = frt_fr_df_new(fi->name, df_field_count, bits_get_compression_type(fi->bits));
1483
779
 
1484
- for (j = 0; j < df_size; j++) {
780
+ for (j = 0; j < df_field_count; j++) {
1485
781
  df->lengths[j] = frt_is_read_vint(fdt_in);
1486
782
  df->encodings[j] = rb_enc_from_index(frt_is_read_vint(fdt_in));
1487
- df->compression = frt_is_read_vint(fdt_in);
783
+ df->compression_type = frt_is_read_vint(fdt_in);
1488
784
  }
1489
785
 
1490
786
  frt_doc_add_field(doc, df);
1491
787
  }
1492
788
  for (i = 0; i < stored_cnt; i++) {
1493
789
  FrtDocField *df = doc->fields[i];
1494
- if (df->compression != FRT_COMPRESSION_NONE) {
1495
- frt_fr_read_compressed_fields(fr, df, df->compression);
790
+ if (df->compression_type != FRT_COMPRESSION_NONE) {
791
+ frt_fr_read_compressed_fields(fr, df, df->compression_type);
1496
792
  } else {
1497
793
  const int df_size = df->size;
1498
794
  for (j = 0; j < df_size; j++) {
1499
- const int read_len = df->lengths[j] + 1;
1500
- df->data[j] = FRT_ALLOC_N(char, read_len);
1501
- frt_is_read_bytes(fdt_in, (frt_uchar *)df->data[j], read_len);
1502
- df->data[j][read_len - 1] = '\0';
795
+ const int read_len = df->lengths[j];
796
+ char *d = FRT_ALLOC_N(char, read_len + 1);
797
+ frt_is_read_bytes(fdt_in, (frt_uchar *)d, read_len);
798
+ d[read_len] = '\0';
799
+ df->data[j] = d;
1503
800
  }
1504
801
  }
1505
802
  }
@@ -1507,8 +804,7 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
1507
804
  return doc;
1508
805
  }
1509
806
 
1510
- FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
1511
- {
807
+ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num) {
1512
808
  int start = 0;
1513
809
  int i, j;
1514
810
  frt_off_t pos;
@@ -1522,23 +818,23 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
1522
818
  frt_is_seek(fdt_in, pos);
1523
819
  stored_cnt = frt_is_read_vint(fdt_in);
1524
820
 
1525
- lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
821
+ lazy_doc = frt_lazy_doc_new(stored_cnt, fdt_in);
1526
822
  for (i = 0; i < stored_cnt; i++) {
1527
823
  FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
1528
824
  const int df_size = frt_is_read_vint(fdt_in);
1529
- FrtLazyDocField *lazy_df = lazy_df_new(fi->name, df_size, frt_fi_get_compression(fi));
825
+ FrtLazyDocField *lazy_df = frt_lazy_df_new(fi->name, df_size, bits_get_compression_type(fi->bits));
1530
826
  const int field_start = start;
1531
827
  /* get the starts relative positions this time around */
1532
828
 
1533
829
  for (j = 0; j < df_size; j++) {
1534
830
  lazy_df->data[j].start = start;
1535
- start += 1 + (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
831
+ start += (lazy_df->data[j].length = frt_is_read_vint(fdt_in));
1536
832
  lazy_df->data[j].encoding = rb_enc_from_index(frt_is_read_vint(fdt_in));
1537
- lazy_df->data[j].compression = frt_is_read_vint(fdt_in);
833
+ lazy_df->data[j].compression_type = frt_is_read_vint(fdt_in);
1538
834
  }
1539
835
 
1540
- lazy_df->len = start - field_start - 1;
1541
- lazy_doc_add_field(lazy_doc, lazy_df, i);
836
+ lazy_df->len = start - field_start;
837
+ frt_lazy_doc_add_field(lazy_doc, lazy_df, i);
1542
838
  }
1543
839
  /* correct the starts to their correct absolute positions */
1544
840
  const frt_off_t abs_start = frt_is_pos(fdt_in);
@@ -1553,8 +849,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
1553
849
  return lazy_doc;
1554
850
  }
1555
851
 
1556
- static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num)
1557
- {
852
+ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num) {
1558
853
  FrtTermVector *tv = FRT_ALLOC_AND_ZERO(FrtTermVector);
1559
854
  FrtInStream *fdt_in = fr->fdt_in;
1560
855
  FrtFieldInfo *fi = fr->fis->fields[field_num];
@@ -1612,8 +907,7 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
1612
907
  return tv;
1613
908
  }
1614
909
 
1615
- FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num)
1616
- {
910
+ FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num) {
1617
911
  FrtHash *term_vectors = frt_h_new_ptr((frt_free_ft)&frt_tv_destroy);
1618
912
  int i;
1619
913
  FrtInStream *fdx_in = fr->fdx_in;
@@ -1699,10 +993,10 @@ FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos
1699
993
  memcpy(file_name, segment, segment_len);
1700
994
 
1701
995
  strcpy(file_name + segment_len, ".fdt");
1702
- fw->fdt_out = store->new_output(store, file_name);
996
+ fw->fdt_out = store->new_output(store, segm_idx_name, file_name);
1703
997
 
1704
998
  strcpy(file_name + segment_len, ".fdx");
1705
- fw->fdx_out = store->new_output(store, file_name);
999
+ fw->fdx_out = store->new_output(store, segm_idx_name, file_name);
1706
1000
 
1707
1001
  fw->buffer = frt_ram_new_buffer();
1708
1002
 
@@ -1720,152 +1014,13 @@ void frt_fw_close(FrtFieldsWriter *fw) {
1720
1014
  free(fw);
1721
1015
  }
1722
1016
 
1723
- static int frt_os_write_brotli_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
1724
- size_t compressed_length = 0;
1725
- const frt_uchar *next_in = data;
1726
- size_t available_in = length;
1727
- size_t available_out;
1728
- frt_uchar compression_buffer[FRT_COMPRESSION_BUFFER_SIZE];
1729
- frt_uchar *next_out;
1730
- BrotliEncoderState *b_state = BrotliEncoderCreateInstance(NULL, NULL, NULL);
1731
- if (!b_state) { comp_raise(); return -1; }
1732
-
1733
- BrotliEncoderSetParameter(b_state, BROTLI_PARAM_QUALITY, FRT_BROTLI_COMPRESSION_LEVEL);
1734
-
1735
- do {
1736
- available_out = FRT_COMPRESSION_BUFFER_SIZE;
1737
- next_out = compression_buffer;
1738
- if (!BrotliEncoderCompressStream(b_state, BROTLI_OPERATION_FINISH,
1739
- &available_in, &next_in,
1740
- &available_out, &next_out, &compressed_length)) {
1741
- BrotliEncoderDestroyInstance(b_state);
1742
- comp_raise();
1743
- return -1;
1744
- }
1745
- frt_os_write_bytes(out_stream, compression_buffer, FRT_COMPRESSION_BUFFER_SIZE - available_out);
1746
- } while (!BrotliEncoderIsFinished(b_state));
1747
-
1748
- BrotliEncoderDestroyInstance(b_state);
1749
-
1750
- return (int)compressed_length;
1751
- }
1752
-
1753
- static int frt_os_write_bz2_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
1754
- int ret, buf_size, compressed_len = 0;
1755
- char out_buffer[FRT_COMPRESSION_BUFFER_SIZE];
1756
- bz_stream zstrm;
1757
- zstrm.bzalloc = NULL;
1758
- zstrm.bzfree = NULL;
1759
- zstrm.opaque = NULL;
1760
- if ((ret = BZ2_bzCompressInit(&zstrm, FRT_BZIP_COMPRESSION_LEVEL, 0, 0)) != BZ_OK) zraise(ret);
1761
-
1762
- zstrm.avail_in = length;
1763
- zstrm.next_in = (char *)data;
1764
- zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
1765
- zstrm.next_out = out_buffer;
1766
-
1767
- do {
1768
- ret = BZ2_bzCompress(&zstrm, BZ_FINISH); /* no bad return value */
1769
- assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
1770
- compressed_len += buf_size = FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
1771
- frt_os_write_bytes(out_stream, (frt_uchar *)out_buffer, buf_size);
1772
- } while (zstrm.avail_out == 0);
1773
- assert(zstrm.avail_in == 0); /* all input will be used */
1774
-
1775
- (void)BZ2_bzCompressEnd(&zstrm);
1776
- return compressed_len;
1777
- }
1778
-
1779
- static const LZ4F_preferences_t lz4_prefs = {
1780
- {
1781
- LZ4F_default,
1782
- LZ4F_blockLinked,
1783
- LZ4F_noContentChecksum,
1784
- LZ4F_frame,
1785
- 0, /* unknown content size */
1786
- 0, /* no dictID */
1787
- LZ4F_noBlockChecksum
1788
- },
1789
- 0,
1790
- 1,
1791
- 1,
1792
- {0,0,0}
1793
- };
1794
-
1795
- static int frt_os_write_lz4_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length) {
1796
- int compressed_length = 0;
1797
- int remaining_length = length;
1798
- size_t ccmp_length = 0;
1799
- LZ4F_compressionContext_t ctx;
1800
- size_t out_buf_length = LZ4F_compressBound(FRT_COMPRESSION_BUFFER_SIZE, &lz4_prefs);
1801
- frt_uchar *out_buf = frt_ecalloc(out_buf_length);
1802
-
1803
- size_t ctx_creation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
1804
- if (LZ4F_isError(ctx_creation)) {
1805
- compressed_length = -1;
1806
- goto finish;
1807
- }
1808
-
1809
- /* create header */
1810
- ccmp_length = LZ4F_compressBegin(ctx, out_buf, out_buf_length, &lz4_prefs);
1811
- if (LZ4F_isError(ccmp_length)) {
1812
- compressed_length = -1;
1813
- goto finish;
1814
- }
1815
- compressed_length = ccmp_length;
1816
- frt_os_write_bytes(out_stream, out_buf, ccmp_length);
1817
-
1818
- /* compress data */
1819
- do {
1820
- int read_length = (FRT_COMPRESSION_BUFFER_SIZE > remaining_length) ? remaining_length : FRT_COMPRESSION_BUFFER_SIZE;
1821
- ccmp_length = LZ4F_compressUpdate(ctx, out_buf, out_buf_length, data + (length - remaining_length), read_length, NULL);
1822
- if (LZ4F_isError(ccmp_length)) {
1823
- compressed_length = -1;
1824
- goto finish;
1825
- }
1826
- frt_os_write_bytes(out_stream, out_buf, ccmp_length);
1827
- compressed_length += ccmp_length;
1828
- remaining_length -= read_length;
1829
- } while (remaining_length > 0);
1830
-
1831
- /* finish up */
1832
- ccmp_length = LZ4F_compressEnd(ctx, out_buf, out_buf_length, NULL);
1833
- if (LZ4F_isError(ccmp_length)) {
1834
- compressed_length = -1;
1835
- goto finish;
1836
- }
1837
-
1838
- frt_os_write_bytes(out_stream, out_buf, ccmp_length);
1839
- compressed_length += ccmp_length;
1840
-
1841
- finish:
1842
- LZ4F_freeCompressionContext(ctx);
1843
- free(out_buf);
1844
-
1845
- return compressed_length;
1846
- }
1847
-
1848
- static int frt_os_write_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length, FrtCompressionType compression) {
1849
- switch (compression) {
1850
- case FRT_COMPRESSION_BROTLI:
1851
- return frt_os_write_brotli_compressed_bytes(out_stream, data, length);
1852
- case FRT_COMPRESSION_BZ2:
1853
- return frt_os_write_bz2_compressed_bytes(out_stream, data, length);
1854
- case FRT_COMPRESSION_LZ4:
1855
- return frt_os_write_lz4_compressed_bytes(out_stream, data, length);
1856
- default:
1857
- return -1;
1858
- }
1859
-
1860
- }
1861
-
1862
1017
  void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
1863
1018
  int i, j, stored_cnt = 0;
1864
1019
  FrtDocField *df;
1865
1020
  FrtFieldInfo *fi;
1866
- FrtCompressionType compression;
1021
+ FrtCompressionType compression_type;
1867
1022
  FrtOutStream *fdt_out = fw->fdt_out, *fdx_out = fw->fdx_out;
1868
- const int doc_size = doc->size;
1023
+ const int doc_size = doc->field_count;
1869
1024
 
1870
1025
  for (i = 0; i < doc_size; i++) {
1871
1026
  df = doc->fields[i];
@@ -1889,23 +1044,20 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
1889
1044
  frt_os_write_vint(fdt_out, df_size);
1890
1045
 
1891
1046
  if (bits_is_compressed(fi->bits)) {
1892
- compression = frt_fi_get_compression(fi);
1047
+ compression_type = bits_get_compression_type(fi->bits);
1893
1048
  for (j = 0; j < df_size; j++) {
1894
- const int length = df->lengths[j];
1895
- int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], length, compression);
1896
- frt_os_write_vint(fdt_out, compressed_len - 1);
1049
+ int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], df->lengths[j], compression_type);
1050
+ frt_os_write_vint(fdt_out, compressed_len);
1897
1051
  frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
1898
- frt_os_write_vint(fdt_out, compression);
1052
+ frt_os_write_vint(fdt_out, compression_type);
1899
1053
  }
1900
1054
  } else {
1901
1055
  for (j = 0; j < df_size; j++) {
1902
1056
  const int length = df->lengths[j];
1057
+ frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
1903
1058
  frt_os_write_vint(fdt_out, length);
1904
1059
  frt_os_write_vint(fdt_out, rb_enc_to_index(df->encodings[j]));
1905
- frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
1906
- frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
1907
- /* leave a space between fields as that is how they are analyzed */
1908
- frt_os_write_byte(fw->buffer, ' ');
1060
+ frt_os_write_vint(fdt_out, FRT_COMPRESSION_NONE);
1909
1061
  }
1910
1062
  }
1911
1063
  }
@@ -1932,8 +1084,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
1932
1084
  FrtPostingList **plists,
1933
1085
  int posting_count,
1934
1086
  FrtOffset *offsets,
1935
- int offset_count)
1936
- {
1087
+ int offset_count) {
1937
1088
  int i, delta_start, delta_length;
1938
1089
  const char *last_term = FRT_EMPTY_STRING;
1939
1090
  FrtOutStream *fdt_out = fw->fdt_out;
@@ -1997,19 +1148,16 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
1997
1148
 
1998
1149
  #define TE(ste) ((FrtTermEnum *)ste)
1999
1150
 
2000
- char *frt_te_get_term(FrtTermEnum *te)
2001
- {
1151
+ char *frt_te_get_term(FrtTermEnum *te) {
2002
1152
  return (char *)memcpy(FRT_ALLOC_N(char, te->curr_term_len + 1),
2003
1153
  te->curr_term, te->curr_term_len + 1);
2004
1154
  }
2005
1155
 
2006
- FrtTermInfo *frt_te_get_ti(FrtTermEnum *te)
2007
- {
1156
+ FrtTermInfo *frt_te_get_ti(FrtTermEnum *te) {
2008
1157
  return (FrtTermInfo*)memcpy(FRT_ALLOC(FrtTermInfo), &(te->curr_ti), sizeof(FrtTermInfo));
2009
1158
  }
2010
1159
 
2011
- static char *te_skip_to(FrtTermEnum *te, const char *term)
2012
- {
1160
+ static char *te_skip_to(FrtTermEnum *te, const char *term) {
2013
1161
  char *curr_term = te->curr_term;
2014
1162
  if (strcmp(curr_term, term) < 0) {
2015
1163
  while (NULL != ((curr_term = te->next(te)))
@@ -2031,8 +1179,7 @@ static char *te_skip_to(FrtTermEnum *te, const char *term)
2031
1179
  * SegmentTermIndex
2032
1180
  ****************************************************************************/
2033
1181
 
2034
- static void sti_destroy(FrtSegmentTermIndex *sti)
2035
- {
1182
+ static void sti_destroy(FrtSegmentTermIndex *sti) {
2036
1183
  if (sti->index_terms) {
2037
1184
  int i;
2038
1185
  const int sti_index_cnt = sti->index_cnt;
@@ -2076,8 +1223,7 @@ static void sti_ensure_index_is_read(FrtSegmentTermIndex *sti, FrtTermEnum *inde
2076
1223
  }
2077
1224
  }
2078
1225
 
2079
- static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
2080
- {
1226
+ static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term) {
2081
1227
  int lo = 0;
2082
1228
  int hi = sti->index_cnt - 1;
2083
1229
  int mid, delta;
@@ -2088,11 +1234,9 @@ static int sti_get_index_offset(FrtSegmentTermIndex *sti, const char *term)
2088
1234
  delta = strcmp(term, index_terms[mid]);
2089
1235
  if (delta < 0) {
2090
1236
  hi = mid - 1;
2091
- }
2092
- else if (delta > 0) {
1237
+ } else if (delta > 0) {
2093
1238
  lo = mid + 1;
2094
- }
2095
- else {
1239
+ } else {
2096
1240
  return mid;
2097
1241
  }
2098
1242
  }
@@ -2120,7 +1264,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
2120
1264
  pthread_mutex_init(&sfi->mutex, NULL);
2121
1265
 
2122
1266
  sprintf(file_name, "%s.tfx", segment);
2123
- is = store->open_input(store, file_name);
1267
+ is = store->open_input(store, segm_idx_name, file_name);
2124
1268
  field_count = (int)frt_is_read_u32(is);
2125
1269
  sfi->index_interval = frt_is_read_vint(is);
2126
1270
  sfi->skip_interval = frt_is_read_vint(is);
@@ -2139,7 +1283,7 @@ FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment) {
2139
1283
  frt_is_close(is);
2140
1284
 
2141
1285
  sprintf(file_name, "%s.tix", segment);
2142
- is = store->open_input(store, file_name);
1286
+ is = store->open_input(store, segm_idx_name, file_name);
2143
1287
  FRT_DEREF(is);
2144
1288
  sfi->index_te = frt_ste_new(is, sfi);
2145
1289
  return sfi;
@@ -2269,13 +1413,11 @@ void frt_ste_close(FrtTermEnum *te) {
2269
1413
  free(te);
2270
1414
  }
2271
1415
 
2272
- static char *frt_ste_get_term(FrtTermEnum *te, int pos)
2273
- {
1416
+ static char *frt_ste_get_term(FrtTermEnum *te, int pos) {
2274
1417
  FrtSegmentTermEnum *ste = STE(te);
2275
1418
  if (pos >= ste->size) {
2276
1419
  return NULL;
2277
- }
2278
- else if (pos != ste->pos) {
1420
+ } else if (pos != ste->pos) {
2279
1421
  int idx_int = ste->sfi->index_interval;
2280
1422
  if ((pos < ste->pos) || pos > (1 + ste->pos / idx_int) * idx_int) {
2281
1423
  FrtSegmentTermIndex *sti = (FrtSegmentTermIndex *)frt_h_get_int(ste->sfi->field_dict, te->field_num);
@@ -2292,8 +1434,7 @@ static char *frt_ste_get_term(FrtTermEnum *te, int pos)
2292
1434
  return te->curr_term;
2293
1435
  }
2294
1436
 
2295
- FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
2296
- {
1437
+ FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi) {
2297
1438
  FrtSegmentTermEnum *ste = ste_allocate();
2298
1439
 
2299
1440
  TE(ste)->field_num = -1;
@@ -2313,8 +1454,7 @@ FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi)
2313
1454
 
2314
1455
  #define MTE(te) ((MultiTermEnum *)(te))
2315
1456
 
2316
- typedef struct TermEnumWrapper
2317
- {
1457
+ typedef struct TermEnumWrapper {
2318
1458
  int index;
2319
1459
  FrtTermEnum *te;
2320
1460
  int *doc_map;
@@ -2322,8 +1462,7 @@ typedef struct TermEnumWrapper
2322
1462
  char *term;
2323
1463
  } TermEnumWrapper;
2324
1464
 
2325
- typedef struct MultiTermEnum
2326
- {
1465
+ typedef struct MultiTermEnum {
2327
1466
  FrtTermEnum te;
2328
1467
  int doc_freq;
2329
1468
  FrtPriorityQueue *tew_queue;
@@ -2335,29 +1474,24 @@ typedef struct MultiTermEnum
2335
1474
  int *ti_indexes;
2336
1475
  } MultiTermEnum;
2337
1476
 
2338
- static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
2339
- {
1477
+ static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2) {
2340
1478
  int cmpres = strcmp(tew1->term, tew2->term);
2341
1479
  if (0 == cmpres) {
2342
1480
  return tew1->index < tew2->index;
2343
- }
2344
- else {
1481
+ } else {
2345
1482
  return cmpres < 0;
2346
1483
  }
2347
1484
  }
2348
1485
 
2349
- static char *tew_next(TermEnumWrapper *tew)
2350
- {
1486
+ static char *tew_next(TermEnumWrapper *tew) {
2351
1487
  return (tew->term = tew->te->next(tew->te));
2352
1488
  }
2353
1489
 
2354
- static char *tew_skip_to(TermEnumWrapper *tew, const char *term)
2355
- {
1490
+ static char *tew_skip_to(TermEnumWrapper *tew, const char *term) {
2356
1491
  return (tew->term = tew->te->skip_to(tew->te, term));
2357
1492
  }
2358
1493
 
2359
- static void tew_destroy(TermEnumWrapper *tew)
2360
- {
1494
+ static void tew_destroy(TermEnumWrapper *tew) {
2361
1495
  frt_ir_close(tew->ir);
2362
1496
  if (tew->doc_map) {
2363
1497
  free(tew->doc_map);
@@ -2424,8 +1558,7 @@ static FrtTermEnum *mte_set_field(FrtTermEnum *te, int field_num) {
2424
1558
  if (tew_next(tew)) {
2425
1559
  frt_pq_push(mte->tew_queue, tew); /* initialize queue */
2426
1560
  }
2427
- }
2428
- else {
1561
+ } else {
2429
1562
  sub_te->field_num = -1;
2430
1563
  }
2431
1564
 
@@ -2528,7 +1661,7 @@ FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, con
2528
1661
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
2529
1662
 
2530
1663
  sprintf(file_name, "%s.tis", segment);
2531
- FrtInStream *is = store->open_input(store, file_name);
1664
+ FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
2532
1665
  FRT_DEREF(is);
2533
1666
  tir->orig_te = frt_ste_new(is, sfi);
2534
1667
  tir->thread_te = 0;
@@ -2607,7 +1740,7 @@ void frt_tir_close(FrtTermInfosReader *tir) {
2607
1740
 
2608
1741
  static FrtTermWriter *frt_tw_new(FrtStore *store, char *file_name) {
2609
1742
  FrtTermWriter *tw = FRT_ALLOC_AND_ZERO(FrtTermWriter);
2610
- tw->os = store->new_output(store, file_name);
1743
+ tw->os = store->new_output(store, segm_idx_name, file_name);
2611
1744
  tw->last_term = FRT_EMPTY_STRING;
2612
1745
  return tw;
2613
1746
  }
@@ -2634,7 +1767,7 @@ FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index
2634
1767
  strcpy(file_name + segment_len, ".tis");
2635
1768
  tiw->tis_writer = frt_tw_new(store, file_name);
2636
1769
  strcpy(file_name + segment_len, ".tfx");
2637
- tiw->tfx_out = store->new_output(store, file_name);
1770
+ tiw->tfx_out = store->new_output(store, segm_idx_name, file_name);
2638
1771
  frt_os_write_u32(tiw->tfx_out, 0); /* make space for field_count */
2639
1772
 
2640
1773
  /* The following two numbers are the first numbers written to the field
@@ -3002,35 +2135,30 @@ static bool stpe_next(FrtTermDocEnum *tde) {
3002
2135
  }
3003
2136
  }
3004
2137
 
3005
- static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
3006
- {
2138
+ static int stpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
3007
2139
  (void)tde; (void)docs; (void)freqs; (void)req_num;
3008
2140
  FRT_RAISE(FRT_ARG_ERROR, "TermPosEnum does not handle processing multiple documents"
3009
2141
  " in one call. Use TermDocEnum instead.");
3010
2142
  return -1;
3011
2143
  }
3012
2144
 
3013
- static int stpe_next_position(FrtTermDocEnum *tde)
3014
- {
2145
+ static int stpe_next_position(FrtTermDocEnum *tde) {
3015
2146
  FrtSegmentTermDocEnum *stde = STDE(tde);
3016
2147
  return (stde->prx_cnt-- > 0) ? stde->position += frt_is_read_vint(stde->prx_in)
3017
2148
  : -1;
3018
2149
  }
3019
2150
 
3020
- static void stpe_close(FrtTermDocEnum *tde)
3021
- {
2151
+ static void stpe_close(FrtTermDocEnum *tde) {
3022
2152
  frt_is_close(STDE(tde)->prx_in);
3023
2153
  STDE(tde)->prx_in = NULL;
3024
2154
  stde_close(tde);
3025
2155
  }
3026
2156
 
3027
- static void stpe_skip_prox(FrtSegmentTermDocEnum *stde)
3028
- {
2157
+ static void stpe_skip_prox(FrtSegmentTermDocEnum *stde) {
3029
2158
  frt_is_skip_vints(stde->prx_in, stde->freq);
3030
2159
  }
3031
2160
 
3032
- static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr)
3033
- {
2161
+ static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr) {
3034
2162
  frt_is_seek(stde->prx_in, prx_ptr);
3035
2163
  stde->prx_cnt = 0;
3036
2164
  }
@@ -3039,8 +2167,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
3039
2167
  FrtInStream *frq_in,
3040
2168
  FrtInStream *prx_in,
3041
2169
  FrtBitVector *del_docs,
3042
- int skip_interval)
3043
- {
2170
+ int skip_interval) {
3044
2171
  FrtTermDocEnum *tde = frt_stde_new(tir, frq_in, del_docs, skip_interval);
3045
2172
  FrtSegmentTermDocEnum *stde = STDE(tde);
3046
2173
 
@@ -3069,8 +2196,7 @@ FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir,
3069
2196
 
3070
2197
  #define MTDE(tde) ((MultiTermDocEnum *)(tde))
3071
2198
 
3072
- typedef struct MultiTermDocEnum
3073
- {
2199
+ typedef struct MultiTermDocEnum {
3074
2200
  FrtTermDocEnum tde;
3075
2201
  int *starts;
3076
2202
  int base;
@@ -3083,16 +2209,14 @@ typedef struct MultiTermDocEnum
3083
2209
  FrtTermDocEnum *curr_tde;
3084
2210
  } MultiTermDocEnum;
3085
2211
 
3086
- static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
3087
- {
2212
+ static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde) {
3088
2213
  mtde->ptr++;
3089
2214
  while (mtde->ptr < mtde->ir_cnt && !mtde->state[mtde->ptr]) {
3090
2215
  mtde->ptr++;
3091
2216
  }
3092
2217
  if (mtde->ptr >= mtde->ir_cnt) {
3093
2218
  return mtde->curr_tde = NULL;
3094
- }
3095
- else {
2219
+ } else {
3096
2220
  FrtTermDocEnum *tde = mtde->irs_tde[mtde->ptr];
3097
2221
  mtde->base = mtde->starts[mtde->ptr];
3098
2222
  return mtde->curr_tde = tde;
@@ -3106,8 +2230,7 @@ static FrtTermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
3106
2230
  }\
3107
2231
  } while (0)
3108
2232
 
3109
- static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
3110
- {
2233
+ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te) {
3111
2234
  int i;
3112
2235
  MultiTermDocEnum *mtde = MTDE(tde);
3113
2236
  memset(mtde->state, 0, mtde->ir_cnt);
@@ -3117,11 +2240,9 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
3117
2240
  mtde->state[index] = 1;
3118
2241
  if (tde->close == stde_close) {
3119
2242
  stde_seek_ti(STDE(tde), MTE(te)->tis + i);
3120
- }
3121
- else if (tde->close == stpe_close) {
2243
+ } else if (tde->close == stpe_close) {
3122
2244
  stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
3123
- }
3124
- else {
2245
+ } else {
3125
2246
  tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
3126
2247
  }
3127
2248
  }
@@ -3130,48 +2251,40 @@ static void mtde_seek_te(FrtTermDocEnum *tde, FrtTermEnum *te)
3130
2251
  mtde_next_tde(mtde);
3131
2252
  }
3132
2253
 
3133
- static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
3134
- {
2254
+ static void mtde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
3135
2255
  MultiTermDocEnum *mtde = MTDE(tde);
3136
2256
  FrtTermEnum *te = mtde->te;
3137
2257
  char *t;
3138
2258
  te->set_field(te, field_num);
3139
2259
  if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
3140
2260
  mtde_seek_te(tde, te);
3141
- }
3142
- else {
2261
+ } else {
3143
2262
  memset(mtde->state, 0, mtde->ir_cnt);
3144
2263
  }
3145
2264
  }
3146
2265
 
3147
- static int mtde_doc_num(FrtTermDocEnum *tde)
3148
- {
2266
+ static int mtde_doc_num(FrtTermDocEnum *tde) {
3149
2267
  CHECK_CURR_TDE("doc_num");
3150
2268
  return MTDE(tde)->base + MTDE(tde)->curr_tde->doc_num(MTDE(tde)->curr_tde);
3151
2269
  }
3152
2270
 
3153
- static int mtde_freq(FrtTermDocEnum *tde)
3154
- {
2271
+ static int mtde_freq(FrtTermDocEnum *tde) {
3155
2272
  CHECK_CURR_TDE("freq");
3156
2273
  return MTDE(tde)->curr_tde->freq(MTDE(tde)->curr_tde);
3157
2274
  }
3158
2275
 
3159
- static bool mtde_next(FrtTermDocEnum *tde)
3160
- {
2276
+ static bool mtde_next(FrtTermDocEnum *tde) {
3161
2277
  MultiTermDocEnum *mtde = MTDE(tde);
3162
2278
  if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
3163
2279
  return true;
3164
- }
3165
- else if (mtde_next_tde(mtde)) {
2280
+ } else if (mtde_next_tde(mtde)) {
3166
2281
  return mtde_next(tde);
3167
- }
3168
- else {
2282
+ } else {
3169
2283
  return false;
3170
2284
  }
3171
2285
  }
3172
2286
 
3173
- static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
3174
- {
2287
+ static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
3175
2288
  int i, end = 0, last_end = 0, b;
3176
2289
  MultiTermDocEnum *mtde = MTDE(tde);
3177
2290
  while (true) {
@@ -3180,24 +2293,21 @@ static int mtde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
3180
2293
  freqs + last_end, req_num - last_end);
3181
2294
  if (end == last_end) { /* none left in segment */
3182
2295
  if (!mtde_next_tde(mtde)) return end;
3183
- }
3184
- else { /* got some */
2296
+ } else { /* got some */
3185
2297
  b = mtde->base; /* adjust doc numbers */
3186
2298
  for (i = last_end; i < end; i++) {
3187
2299
  docs[i] += b;
3188
2300
  }
3189
2301
  if (end == req_num) {
3190
2302
  return end;
3191
- }
3192
- else {
2303
+ } else {
3193
2304
  last_end = end;
3194
2305
  }
3195
2306
  }
3196
2307
  }
3197
2308
  }
3198
2309
 
3199
- static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
3200
- {
2310
+ static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
3201
2311
  MultiTermDocEnum *mtde = MTDE(tde);
3202
2312
  FrtTermDocEnum *curr_tde;
3203
2313
  while (NULL != (curr_tde = mtde->curr_tde)) {
@@ -3211,8 +2321,7 @@ static bool mtde_skip_to(FrtTermDocEnum *tde, int target_doc_num)
3211
2321
  return false;
3212
2322
  }
3213
2323
 
3214
- static void mtde_close(FrtTermDocEnum *tde)
3215
- {
2324
+ static void mtde_close(FrtTermDocEnum *tde) {
3216
2325
  MultiTermDocEnum *mtde = MTDE(tde);
3217
2326
  FrtTermDocEnum *tmp_tde;
3218
2327
  int i = mtde->ir_cnt;
@@ -3227,8 +2336,7 @@ static void mtde_close(FrtTermDocEnum *tde)
3227
2336
  free(tde);
3228
2337
  }
3229
2338
 
3230
- static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
3231
- {
2339
+ static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr) {
3232
2340
  MultiTermDocEnum *mtde = FRT_ALLOC_AND_ZERO(MultiTermDocEnum);
3233
2341
  FrtTermDocEnum *tde = TDE(mtde);
3234
2342
  tde->seek = &mtde_seek;
@@ -3250,8 +2358,7 @@ static FrtTermDocEnum *mtxe_new(FrtMultiReader *mr)
3250
2358
  return tde;
3251
2359
  }
3252
2360
 
3253
- static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
3254
- {
2361
+ static FrtTermDocEnum *mtde_new(FrtMultiReader *mr) {
3255
2362
  int i;
3256
2363
  FrtTermDocEnum *tde = mtxe_new(mr);
3257
2364
  tde->next_position = NULL;
@@ -3266,14 +2373,12 @@ static FrtTermDocEnum *mtde_new(FrtMultiReader *mr)
3266
2373
  * MultiTermPosEnum
3267
2374
  ****************************************************************************/
3268
2375
 
3269
- static int mtpe_next_position(FrtTermDocEnum *tde)
3270
- {
2376
+ static int mtpe_next_position(FrtTermDocEnum *tde) {
3271
2377
  CHECK_CURR_TDE("next_position");
3272
2378
  return MTDE(tde)->curr_tde->next_position(MTDE(tde)->curr_tde);
3273
2379
  }
3274
2380
 
3275
- static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
3276
- {
2381
+ static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr) {
3277
2382
  int i;
3278
2383
  FrtTermDocEnum *tde = mtxe_new(mr);
3279
2384
  tde->next_position = &mtpe_next_position;
@@ -3293,8 +2398,7 @@ static FrtTermDocEnum *mtpe_new(FrtMultiReader *mr)
3293
2398
  #define MTDPE(tde) ((MultipleTermDocPosEnum *)(tde))
3294
2399
  #define MTDPE_POS_QUEUE_INIT_CAPA 8
3295
2400
 
3296
- typedef struct
3297
- {
2401
+ typedef struct {
3298
2402
  FrtTermDocEnum tde;
3299
2403
  int doc_num;
3300
2404
  int freq;
@@ -3309,8 +2413,7 @@ static void tde_destroy(FrtTermDocEnum *tde) {
3309
2413
  tde->close(tde);
3310
2414
  }
3311
2415
 
3312
- static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
3313
- {
2416
+ static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
3314
2417
  (void)tde;
3315
2418
  (void)field_num;
3316
2419
  (void)term;
@@ -3318,18 +2421,15 @@ static void mtdpe_seek(FrtTermDocEnum *tde, int field_num, const char *term)
3318
2421
  " the #seek operation");
3319
2422
  }
3320
2423
 
3321
- static int mtdpe_doc_num(FrtTermDocEnum *tde)
3322
- {
2424
+ static int mtdpe_doc_num(FrtTermDocEnum *tde) {
3323
2425
  return MTDPE(tde)->doc_num;
3324
2426
  }
3325
2427
 
3326
- static int mtdpe_freq(FrtTermDocEnum *tde)
3327
- {
2428
+ static int mtdpe_freq(FrtTermDocEnum *tde) {
3328
2429
  return MTDPE(tde)->freq;
3329
2430
  }
3330
2431
 
3331
- static bool mtdpe_next(FrtTermDocEnum *tde)
3332
- {
2432
+ static bool mtdpe_next(FrtTermDocEnum *tde) {
3333
2433
  FrtTermDocEnum *sub_tde;
3334
2434
  int pos = 0, freq = 0;
3335
2435
  int doc;
@@ -3358,8 +2458,7 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
3358
2458
 
3359
2459
  if (sub_tde->next(sub_tde)) {
3360
2460
  frt_pq_down(mtdpe->pq);
3361
- }
3362
- else {
2461
+ } else {
3363
2462
  sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe->pq);
3364
2463
  sub_tde->close(sub_tde);
3365
2464
  }
@@ -3375,13 +2474,11 @@ static bool mtdpe_next(FrtTermDocEnum *tde)
3375
2474
  return true;
3376
2475
  }
3377
2476
 
3378
- static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2)
3379
- {
2477
+ static bool tdpe_less_than(FrtTermDocEnum *p1, FrtTermDocEnum *p2) {
3380
2478
  return p1->doc_num(p1) < p2->doc_num(p2);
3381
2479
  }
3382
2480
 
3383
- static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
3384
- {
2481
+ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
3385
2482
  FrtTermDocEnum *sub_tde;
3386
2483
  FrtPriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
3387
2484
 
@@ -3389,8 +2486,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
3389
2486
  && (target_doc_num > sub_tde->doc_num(sub_tde))) {
3390
2487
  if (sub_tde->skip_to(sub_tde, target_doc_num)) {
3391
2488
  frt_pq_down(mtdpe_pq);
3392
- }
3393
- else {
2489
+ } else {
3394
2490
  sub_tde = (FrtTermDocEnum *)frt_pq_pop(mtdpe_pq);
3395
2491
  sub_tde->close(sub_tde);
3396
2492
  }
@@ -3398,8 +2494,7 @@ static bool mtdpe_skip_to(FrtTermDocEnum *tde, int target_doc_num)
3398
2494
  return tde->next(tde);
3399
2495
  }
3400
2496
 
3401
- static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
3402
- {
2497
+ static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
3403
2498
  (void)tde;
3404
2499
  (void)docs;
3405
2500
  (void)freqs;
@@ -3408,21 +2503,18 @@ static int mtdpe_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num)
3408
2503
  return req_num;
3409
2504
  }
3410
2505
 
3411
- static int mtdpe_next_position(FrtTermDocEnum *tde)
3412
- {
2506
+ static int mtdpe_next_position(FrtTermDocEnum *tde) {
3413
2507
  return MTDPE(tde)->pos_queue[MTDPE(tde)->pos_queue_index++];
3414
2508
  }
3415
2509
 
3416
- static void mtdpe_close(FrtTermDocEnum *tde)
3417
- {
2510
+ static void mtdpe_close(FrtTermDocEnum *tde) {
3418
2511
  frt_pq_clear(MTDPE(tde)->pq);
3419
2512
  frt_pq_destroy(MTDPE(tde)->pq);
3420
2513
  free(MTDPE(tde)->pos_queue);
3421
2514
  free(tde);
3422
2515
  }
3423
2516
 
3424
- FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt)
3425
- {
2517
+ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt) {
3426
2518
  int i;
3427
2519
  MultipleTermDocPosEnum *mtdpe = FRT_ALLOC_AND_ZERO(MultipleTermDocPosEnum);
3428
2520
  FrtTermDocEnum *tde = TDE(mtdpe);
@@ -3437,8 +2529,7 @@ FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, i
3437
2529
  tpe->seek(tpe, field_num, terms[i]);
3438
2530
  if (tpe->next(tpe)) {
3439
2531
  frt_pq_push(pq, tpe);
3440
- }
3441
- else {
2532
+ } else {
3442
2533
  tpe->close(tpe);
3443
2534
  }
3444
2535
  }
@@ -3471,26 +2562,22 @@ static void file_name_filter_init(void) {
3471
2562
  frt_register_for_cleanup(fn_extensions, (frt_free_ft)&frt_h_destroy);
3472
2563
  }
3473
2564
 
3474
- bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks)
3475
- {
2565
+ bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks) {
3476
2566
  char *p = strrchr(file_name, '.');
3477
2567
  if (NULL == fn_extensions) file_name_filter_init();
3478
2568
  if (NULL != p) {
3479
2569
  char *extension = p + 1;
3480
2570
  if (NULL != frt_h_get(fn_extensions, extension)) {
3481
2571
  return true;
3482
- }
3483
- else if ((*extension == 'f' || *extension == 's')
2572
+ } else if ((*extension == 'f' || *extension == 's')
3484
2573
  && *(extension + 1) >= '0'
3485
2574
  && *(extension + 1) <= '9') {
3486
2575
  return true;
3487
- }
3488
- else if (include_locks && (strcmp(extension, "lck") == 0)
2576
+ } else if (include_locks && (strcmp(extension, "lck") == 0)
3489
2577
  && (strncmp(file_name, "ferret", 6) == 0)) {
3490
2578
  return true;
3491
2579
  }
3492
- }
3493
- else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
2580
+ } else if (0 == strncmp(FRT_SEGMENTS_FILE_NAME, file_name,
3494
2581
  sizeof(FRT_SEGMENTS_FILE_NAME) - 1)) {
3495
2582
  return true;
3496
2583
  }
@@ -3526,8 +2613,8 @@ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
3526
2613
  void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
3527
2614
  FrtStore *store = dlr->store;
3528
2615
  FRT_TRY
3529
- if (store->exists(store, file_name)) {
3530
- store->remove(store, file_name);
2616
+ if (store->exists(store, segm_idx_name, file_name)) {
2617
+ store->remove(store, segm_idx_name, file_name);
3531
2618
  }
3532
2619
  frt_hs_del(dlr->pending, file_name);
3533
2620
  FRT_XCATCHALL
@@ -3642,20 +2729,18 @@ void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
3642
2729
  * info: */
3643
2730
  frt_sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
3644
2731
 
3645
- store->each(store, &frt_deleter_find_deletable_files_i, &dfa);
2732
+ store->each(store, segm_idx_name, &frt_deleter_find_deletable_files_i, &dfa);
3646
2733
  frt_h_destroy(dfa.current);
3647
2734
  }
3648
2735
 
3649
- static void deleter_delete_deletable_files(FrtDeleter *dlr)
3650
- {
2736
+ static void deleter_delete_deletable_files(FrtDeleter *dlr) {
3651
2737
  frt_deleter_find_deletable_files(dlr);
3652
2738
  deleter_commit_pending_deletions(dlr);
3653
2739
  }
3654
2740
 
3655
2741
  /*
3656
2742
  TODO: currently not used. Why not?
3657
- static void deleter_clear_pending_deletions(FrtDeleter *dlr)
3658
- {
2743
+ static void deleter_clear_pending_deletions(FrtDeleter *dlr) {
3659
2744
  frt_hs_clear(dlr->pending);
3660
2745
  }
3661
2746
  */
@@ -3666,14 +2751,12 @@ static void deleter_clear_pending_deletions(FrtDeleter *dlr)
3666
2751
  *
3667
2752
  ****************************************************************************/
3668
2753
 
3669
- static void ir_acquire_not_necessary(FrtIndexReader *ir)
3670
- {
2754
+ static void ir_acquire_not_necessary(FrtIndexReader *ir) {
3671
2755
  (void)ir;
3672
2756
  }
3673
2757
 
3674
2758
  #define I64_PFX POSH_I64_PRINTF_PREFIX
3675
- static void ir_acquire_write_lock(FrtIndexReader *ir)
3676
- {
2759
+ static void ir_acquire_write_lock(FrtIndexReader *ir) {
3677
2760
  if (ir->is_stale) {
3678
2761
  FRT_RAISE(FRT_STATE_ERROR, "IndexReader out of date and no longer valid for "
3679
2762
  "delete, undelete, or set_norm operations. To "
@@ -3682,7 +2765,7 @@ static void ir_acquire_write_lock(FrtIndexReader *ir)
3682
2765
  }
3683
2766
 
3684
2767
  if (NULL == ir->write_lock) {
3685
- ir->write_lock = frt_open_lock(ir->store, FRT_WRITE_LOCK_NAME);
2768
+ ir->write_lock = frt_open_lock(ir->store, segm_idx_name, FRT_WRITE_LOCK_NAME);
3686
2769
  if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
3687
2770
  FRT_RAISE(FRT_LOCK_ERROR, "Could not obtain write lock when trying to "
3688
2771
  "write changes to the index. Check that there "
@@ -3728,8 +2811,7 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
3728
2811
  ir->is_owner = is_owner;
3729
2812
  if (is_owner) {
3730
2813
  ir->acquire_write_lock = &ir_acquire_write_lock;
3731
- }
3732
- else {
2814
+ } else {
3733
2815
  ir->acquire_write_lock = &ir_acquire_not_necessary;
3734
2816
  }
3735
2817
 
@@ -3740,8 +2822,7 @@ int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term) {
3740
2822
  int field_num = frt_fis_get_field_num(ir->fis, field);
3741
2823
  if (field_num >= 0) {
3742
2824
  return ir->doc_freq(ir, field_num, term);
3743
- }
3744
- else {
2825
+ } else {
3745
2826
  return 0;
3746
2827
  }
3747
2828
  }
@@ -3761,8 +2842,7 @@ void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val) {
3761
2842
  }
3762
2843
  }
3763
2844
 
3764
- frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num)
3765
- {
2845
+ frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num) {
3766
2846
  frt_uchar *norms = NULL;
3767
2847
  if (field_num >= 0) {
3768
2848
  norms = ir->get_norms(ir, field_num);
@@ -3785,15 +2865,13 @@ frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
3785
2865
  int field_num = frt_fis_get_field_num(ir->fis, field);
3786
2866
  if (field_num >= 0) {
3787
2867
  ir->get_norms_into(ir, field_num, buf);
3788
- }
3789
- else {
2868
+ } else {
3790
2869
  memset(buf, 0, ir->max_doc(ir));
3791
2870
  }
3792
2871
  return buf;
3793
2872
  }
3794
2873
 
3795
- void frt_ir_undelete_all(FrtIndexReader *ir)
3796
- {
2874
+ void frt_ir_undelete_all(FrtIndexReader *ir) {
3797
2875
  pthread_mutex_lock(&ir->mutex);
3798
2876
  ir->acquire_write_lock(ir);
3799
2877
  ir->undelete_all_i(ir);
@@ -3801,8 +2879,7 @@ void frt_ir_undelete_all(FrtIndexReader *ir)
3801
2879
  pthread_mutex_unlock(&ir->mutex);
3802
2880
  }
3803
2881
 
3804
- void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num)
3805
- {
2882
+ void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num) {
3806
2883
  if (doc_num >= 0 && doc_num < ir->max_doc(ir)) {
3807
2884
  pthread_mutex_lock(&ir->mutex);
3808
2885
  ir->acquire_write_lock(ir);
@@ -3861,8 +2938,7 @@ FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const ch
3861
2938
  return tde;
3862
2939
  }
3863
2940
 
3864
- static void ir_commit_i(FrtIndexReader *ir)
3865
- {
2941
+ static void ir_commit_i(FrtIndexReader *ir) {
3866
2942
  if (ir->has_changes) {
3867
2943
  if (NULL == ir->deleter && NULL != ir->store) {
3868
2944
  /* In the MultiReader case, we share this deleter across all
@@ -3888,16 +2964,14 @@ static void ir_commit_i(FrtIndexReader *ir)
3888
2964
  frt_close_lock(ir->write_lock);
3889
2965
  ir->write_lock = NULL;
3890
2966
  }
3891
- }
3892
- else {
2967
+ } else {
3893
2968
  ir->commit_i(ir);
3894
2969
  }
3895
2970
  }
3896
2971
  ir->has_changes = false;
3897
2972
  }
3898
2973
 
3899
- void frt_ir_commit(FrtIndexReader *ir)
3900
- {
2974
+ void frt_ir_commit(FrtIndexReader *ir) {
3901
2975
  pthread_mutex_lock(&ir->mutex);
3902
2976
  ir_commit_i(ir);
3903
2977
  pthread_mutex_unlock(&ir->mutex);
@@ -3930,15 +3004,13 @@ void frt_ir_close(FrtIndexReader *ir) {
3930
3004
  /**
3931
3005
  * Don't call this method if the cache already exists
3932
3006
  **/
3933
- void frt_ir_add_cache(FrtIndexReader *ir)
3934
- {
3007
+ void frt_ir_add_cache(FrtIndexReader *ir) {
3935
3008
  if (NULL == ir->cache) {
3936
3009
  ir->cache = frt_co_hash_create();
3937
3010
  }
3938
3011
  }
3939
3012
 
3940
- bool frt_ir_is_latest(FrtIndexReader *ir)
3941
- {
3013
+ bool frt_ir_is_latest(FrtIndexReader *ir) {
3942
3014
  return ir->is_latest_i(ir);
3943
3015
  }
3944
3016
 
@@ -3953,8 +3025,7 @@ typedef struct Norm {
3953
3025
  bool is_dirty : 1;
3954
3026
  } Norm;
3955
3027
 
3956
- static Norm *norm_create(FrtInStream *is, int field_num)
3957
- {
3028
+ static Norm *norm_create(FrtInStream *is, int field_num) {
3958
3029
  Norm *norm = FRT_ALLOC(Norm);
3959
3030
 
3960
3031
  norm->is = is;
@@ -3966,8 +3037,7 @@ static Norm *norm_create(FrtInStream *is, int field_num)
3966
3037
  return norm;
3967
3038
  }
3968
3039
 
3969
- static void norm_destroy(Norm *norm)
3970
- {
3040
+ static void norm_destroy(Norm *norm) {
3971
3041
  frt_is_close(norm->is);
3972
3042
  if (NULL != norm->bytes) {
3973
3043
  free(norm->bytes);
@@ -3976,8 +3046,7 @@ static void norm_destroy(Norm *norm)
3976
3046
  }
3977
3047
 
3978
3048
  static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
3979
- FrtSegmentInfo *si, int doc_count)
3980
- {
3049
+ FrtSegmentInfo *si, int doc_count) {
3981
3050
  FrtOutStream *os;
3982
3051
  char norm_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
3983
3052
  const int field_num = norm->field_num;
@@ -3987,7 +3056,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
3987
3056
  }
3988
3057
  frt_si_advance_norm_gen(si, field_num);
3989
3058
  si_norm_file_name(si, norm_file_name, field_num);
3990
- os = store->new_output(store, norm_file_name);
3059
+ os = store->new_output(store, segm_idx_name, norm_file_name);
3991
3060
  frt_os_write_bytes(os, norm->bytes, doc_count);
3992
3061
  frt_os_close(os);
3993
3062
  norm->is_dirty = false;
@@ -4011,8 +3080,7 @@ static FrtFieldsReader *sr_fr(FrtSegmentReader *sr) {
4011
3080
  return fr;
4012
3081
  }
4013
3082
 
4014
- static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num)
4015
- {
3083
+ static bool sr_is_deleted_i(FrtSegmentReader *sr, int doc_num) {
4016
3084
  return (NULL != sr->deleted_docs && frt_bv_get(sr->deleted_docs, doc_num));
4017
3085
  }
4018
3086
 
@@ -4080,10 +3148,9 @@ static void sr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
4080
3148
  ir->deleter = deleter;
4081
3149
  }
4082
3150
 
4083
- static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
4084
- {
3151
+ static void bv_write(FrtBitVector *bv, FrtStore *store, char *name) {
4085
3152
  int i;
4086
- FrtOutStream *os = store->new_output(store, name);
3153
+ FrtOutStream *os = store->new_output(store, segm_idx_name, name);
4087
3154
  frt_os_write_vint(os, bv->size);
4088
3155
  for (i = ((bv->size-1) >> 5); i >= 0; i--) {
4089
3156
  frt_os_write_u32(os, bv->bits[i]);
@@ -4091,11 +3158,10 @@ static void bv_write(FrtBitVector *bv, FrtStore *store, char *name)
4091
3158
  frt_os_close(os);
4092
3159
  }
4093
3160
 
4094
- static FrtBitVector *bv_read(FrtStore *store, char *name)
4095
- {
3161
+ static FrtBitVector *bv_read(FrtStore *store, char *name) {
4096
3162
  int i;
4097
3163
  volatile bool success = false;
4098
- FrtInStream *volatile is = store->open_input(store, name);
3164
+ FrtInStream *volatile is = store->open_input(store, segm_idx_name, name);
4099
3165
  FrtBitVector *volatile bv = FRT_ALLOC_AND_ZERO(FrtBitVector);
4100
3166
  bv->size = (int)frt_is_read_vint(is);
4101
3167
  bv->capa = (bv->size >> 5) + 1;
@@ -4114,13 +3180,11 @@ static FrtBitVector *bv_read(FrtStore *store, char *name)
4114
3180
  return bv;
4115
3181
  }
4116
3182
 
4117
- static bool sr_is_latest_i(FrtIndexReader *ir)
4118
- {
3183
+ static bool sr_is_latest_i(FrtIndexReader *ir) {
4119
3184
  return (frt_sis_read_current_version(ir->store) == ir->sis->version);
4120
3185
  }
4121
3186
 
4122
- static void sr_commit_i(FrtIndexReader *ir)
4123
- {
3187
+ static void sr_commit_i(FrtIndexReader *ir) {
4124
3188
  FrtSegmentInfo *si = SR(ir)->si;
4125
3189
  char *segment = SR(ir)->si->name;
4126
3190
  char tmp_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
@@ -4133,8 +3197,7 @@ static void sr_commit_i(FrtIndexReader *ir)
4133
3197
  if (SR(ir)->undelete_all) {
4134
3198
  si->del_gen = -1;
4135
3199
  SR(ir)->undelete_all = false;
4136
- }
4137
- else {
3200
+ } else {
4138
3201
  /* (SR(ir)->deleted_docs_dirty) re-write deleted */
4139
3202
  si->del_gen++;
4140
3203
  frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
@@ -4177,8 +3240,7 @@ static void sr_close_i(FrtIndexReader *ir) {
4177
3240
  }
4178
3241
  }
4179
3242
 
4180
- static int sr_num_docs(FrtIndexReader *ir)
4181
- {
3243
+ static int sr_num_docs(FrtIndexReader *ir) {
4182
3244
  int num_docs;
4183
3245
 
4184
3246
  pthread_mutex_lock(&ir->mutex);
@@ -4190,13 +3252,11 @@ static int sr_num_docs(FrtIndexReader *ir)
4190
3252
  return num_docs;
4191
3253
  }
4192
3254
 
4193
- static int sr_max_doc(FrtIndexReader *ir)
4194
- {
3255
+ static int sr_max_doc(FrtIndexReader *ir) {
4195
3256
  return SR(ir)->fr->size;
4196
3257
  }
4197
3258
 
4198
- static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
4199
- {
3259
+ static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num) {
4200
3260
  FrtDocument *doc;
4201
3261
  pthread_mutex_lock(&ir->mutex);
4202
3262
  if (sr_is_deleted_i(SR(ir), doc_num)) {
@@ -4208,8 +3268,7 @@ static FrtDocument *sr_get_doc(FrtIndexReader *ir, int doc_num)
4208
3268
  return doc;
4209
3269
  }
4210
3270
 
4211
- static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
4212
- {
3271
+ static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
4213
3272
  FrtLazyDoc *lazy_doc;
4214
3273
  pthread_mutex_lock(&ir->mutex);
4215
3274
  if (sr_is_deleted_i(SR(ir), doc_num)) {
@@ -4221,8 +3280,7 @@ static FrtLazyDoc *sr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
4221
3280
  return lazy_doc;
4222
3281
  }
4223
3282
 
4224
- static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
4225
- {
3283
+ static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num) {
4226
3284
  frt_uchar *norms;
4227
3285
  pthread_mutex_lock(&ir->mutex);
4228
3286
  norms = sr_get_norms_i(SR(ir), field_num);
@@ -4231,23 +3289,20 @@ static frt_uchar *sr_get_norms(FrtIndexReader *ir, int field_num)
4231
3289
  }
4232
3290
 
4233
3291
  static frt_uchar *sr_get_norms_into(FrtIndexReader *ir, int field_num,
4234
- frt_uchar *buf)
4235
- {
3292
+ frt_uchar *buf) {
4236
3293
  pthread_mutex_lock(&ir->mutex);
4237
3294
  sr_get_norms_into_i(SR(ir), field_num, buf);
4238
3295
  pthread_mutex_unlock(&ir->mutex);
4239
3296
  return buf;
4240
3297
  }
4241
3298
 
4242
- static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num)
4243
- {
3299
+ static FrtTermEnum *sr_terms(FrtIndexReader *ir, int field_num) {
4244
3300
  FrtTermEnum *te = SR(ir)->tir->orig_te;
4245
3301
  te = frt_ste_clone(te);
4246
3302
  return ste_set_field(te, field_num);
4247
3303
  }
4248
3304
 
4249
- static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
4250
- {
3305
+ static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
4251
3306
  FrtTermEnum *te = SR(ir)->tir->orig_te;
4252
3307
  te = frt_ste_clone(te);
4253
3308
  ste_set_field(te, field_num);
@@ -4255,20 +3310,17 @@ static FrtTermEnum *sr_terms_from(FrtIndexReader *ir, int field_num, const char
4255
3310
  return te;
4256
3311
  }
4257
3312
 
4258
- static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term)
4259
- {
3313
+ static int sr_doc_freq(FrtIndexReader *ir, int field_num, const char *term) {
4260
3314
  FrtTermInfo *ti = frt_tir_get_ti(frt_tir_set_field(SR(ir)->tir, field_num), term);
4261
3315
  return ti ? ti->doc_freq : 0;
4262
3316
  }
4263
3317
 
4264
- static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir)
4265
- {
3318
+ static FrtTermDocEnum *sr_term_docs(FrtIndexReader *ir) {
4266
3319
  return frt_stde_new(SR(ir)->tir, SR(ir)->frq_in, SR(ir)->deleted_docs,
4267
3320
  STE(SR(ir)->tir->orig_te)->skip_interval);
4268
3321
  }
4269
3322
 
4270
- static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir)
4271
- {
3323
+ static FrtTermDocEnum *sr_term_positions(FrtIndexReader *ir) {
4272
3324
  FrtSegmentReader *sr = SR(ir);
4273
3325
  return frt_stpe_new(sr->tir, sr->frq_in, sr->prx_in, sr->deleted_docs,
4274
3326
  STE(sr->tir->orig_te)->skip_interval);
@@ -4285,8 +3337,7 @@ static FrtTermVector *sr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
4285
3337
  return frt_fr_get_field_tv(fr, doc_num, fi->number);
4286
3338
  }
4287
3339
 
4288
- static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
4289
- {
3340
+ static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num) {
4290
3341
  FrtFieldsReader *fr;
4291
3342
  if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
4292
3343
  return NULL;
@@ -4295,8 +3346,7 @@ static FrtHash *sr_term_vectors(FrtIndexReader *ir, int doc_num)
4295
3346
  return frt_fr_get_tv(fr, doc_num);
4296
3347
  }
4297
3348
 
4298
- static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
4299
- {
3349
+ static bool sr_is_deleted(FrtIndexReader *ir, int doc_num) {
4300
3350
  bool is_del;
4301
3351
 
4302
3352
  pthread_mutex_lock(&ir->mutex);
@@ -4306,13 +3356,11 @@ static bool sr_is_deleted(FrtIndexReader *ir, int doc_num)
4306
3356
  return is_del;
4307
3357
  }
4308
3358
 
4309
- static bool sr_has_deletions(FrtIndexReader *ir)
4310
- {
3359
+ static bool sr_has_deletions(FrtIndexReader *ir) {
4311
3360
  return NULL != SR(ir)->deleted_docs;
4312
3361
  }
4313
3362
 
4314
- static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
4315
- {
3363
+ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store) {
4316
3364
  int i;
4317
3365
  FrtSegmentInfo *si = SR(ir)->si;
4318
3366
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
@@ -4320,7 +3368,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
4320
3368
  for (i = si->norm_gens_size - 1; i >= 0; i--) {
4321
3369
  FrtStore *store = ir->store;
4322
3370
  if (si_norm_file_name(si, file_name, i)) {
4323
- FrtInStream *is = store->open_input(store, file_name);
3371
+ FrtInStream *is = store->open_input(store, segm_idx_name, file_name);
4324
3372
  FRT_DEREF(is);
4325
3373
  frt_h_set_int(SR(ir)->norms, i, norm_create(is, i));
4326
3374
  }
@@ -4328,8 +3376,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
4328
3376
  SR(ir)->norms_dirty = false;
4329
3377
  }
4330
3378
 
4331
- static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
4332
- {
3379
+ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr) {
4333
3380
  FrtStore *volatile store = sr->si->store;
4334
3381
  FrtIndexReader *ir = IR(sr);
4335
3382
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
@@ -4378,12 +3425,12 @@ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
4378
3425
  }
4379
3426
 
4380
3427
  sprintf(file_name, "%s.frq", sr_segment);
4381
- sr->frq_in = store->open_input(store, file_name);
3428
+ sr->frq_in = store->open_input(store, segm_idx_name, file_name);
4382
3429
  sprintf(file_name, "%s.prx", sr_segment);
4383
- sr->prx_in = store->open_input(store, file_name);
3430
+ sr->prx_in = store->open_input(store, segm_idx_name, file_name);
4384
3431
  sr->norms = frt_h_new_int((frt_free_ft)&norm_destroy);
4385
3432
  sr_open_norms(ir, store);
4386
- if (fis_has_vectors(ir->fis)) {
3433
+ if (frt_fis_has_vectors(ir->fis)) {
4387
3434
  frb_thread_key_create(&sr->thread_fr, NULL);
4388
3435
  sr->fr_bucket = frt_ary_new();
4389
3436
  }
@@ -4413,8 +3460,7 @@ static FrtIndexReader *sr_open(FrtSegmentInfos *sis, FrtFieldInfos *fis, int si_
4413
3460
 
4414
3461
  #define MR(ir) ((FrtMultiReader *)(ir))
4415
3462
 
4416
- static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
4417
- {
3463
+ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num) {
4418
3464
  int lo = 0; /* search @starts array */
4419
3465
  int hi = mr->r_cnt - 1; /* for first element less */
4420
3466
  int mid;
@@ -4425,11 +3471,9 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
4425
3471
  mid_value = mr->starts[mid];
4426
3472
  if (doc_num < mid_value) {
4427
3473
  hi = mid - 1;
4428
- }
4429
- else if (doc_num > mid_value) {
3474
+ } else if (doc_num > mid_value) {
4430
3475
  lo = mid + 1;
4431
- }
4432
- else { /* found a match */
3476
+ } else { /* found a match */
4433
3477
  while ((mid+1 < mr->r_cnt) && (mr->starts[mid+1] == mid_value)) {
4434
3478
  mid += 1; /* scan to last match in case we have empty segments */
4435
3479
  }
@@ -4439,8 +3483,7 @@ static int mr_reader_index_i(FrtMultiReader *mr, int doc_num)
4439
3483
  return hi;
4440
3484
  }
4441
3485
 
4442
- static int mr_num_docs(FrtIndexReader *ir)
4443
- {
3486
+ static int mr_num_docs(FrtIndexReader *ir) {
4444
3487
  int i, num_docs;
4445
3488
  pthread_mutex_lock(&ir->mutex);
4446
3489
  if (MR(ir)->num_docs_cache == -1) {
@@ -4457,8 +3500,7 @@ static int mr_num_docs(FrtIndexReader *ir)
4457
3500
  return num_docs;
4458
3501
  }
4459
3502
 
4460
- static int mr_max_doc(FrtIndexReader *ir)
4461
- {
3503
+ static int mr_max_doc(FrtIndexReader *ir) {
4462
3504
  return MR(ir)->max_doc;
4463
3505
  }
4464
3506
 
@@ -4466,30 +3508,25 @@ static int mr_max_doc(FrtIndexReader *ir)
4466
3508
  int i = mr_reader_index_i(MR(ir), doc_num);\
4467
3509
  FrtIndexReader *reader = MR(ir)->sub_readers[i]
4468
3510
 
4469
- static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num)
4470
- {
3511
+ static FrtDocument *mr_get_doc(FrtIndexReader *ir, int doc_num) {
4471
3512
  GET_READER();
4472
3513
  return reader->get_doc(reader, doc_num - MR(ir)->starts[i]);
4473
3514
  }
4474
3515
 
4475
- static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num)
4476
- {
3516
+ static FrtLazyDoc *mr_get_lazy_doc(FrtIndexReader *ir, int doc_num) {
4477
3517
  GET_READER();
4478
3518
  return reader->get_lazy_doc(reader, doc_num - MR(ir)->starts[i]);
4479
3519
  }
4480
3520
 
4481
- int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num)
4482
- {
3521
+ int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num) {
4483
3522
  if (mr->field_num_map) {
4484
3523
  return mr->field_num_map[ir_num][f_num];
4485
- }
4486
- else {
3524
+ } else {
4487
3525
  return f_num;
4488
3526
  }
4489
3527
  }
4490
3528
 
4491
- static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
4492
- {
3529
+ static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num) {
4493
3530
  frt_uchar *bytes;
4494
3531
 
4495
3532
  pthread_mutex_lock(&ir->mutex);
@@ -4514,16 +3551,14 @@ static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num)
4514
3551
  return bytes;
4515
3552
  }
4516
3553
 
4517
- static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf)
4518
- {
3554
+ static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar *buf) {
4519
3555
  frt_uchar *bytes;
4520
3556
 
4521
3557
  pthread_mutex_lock(&ir->mutex);
4522
3558
  bytes = (frt_uchar *)frt_h_get_int(MR(ir)->norms_cache, field_num);
4523
3559
  if (NULL != bytes) {
4524
3560
  memcpy(buf, bytes, MR(ir)->max_doc);
4525
- }
4526
- else {
3561
+ } else {
4527
3562
  int i;
4528
3563
  const int mr_reader_cnt = MR(ir)->r_cnt;
4529
3564
  for (i = 0; i < mr_reader_cnt; i++) {
@@ -4538,18 +3573,15 @@ static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar
4538
3573
  return buf;
4539
3574
  }
4540
3575
 
4541
- static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num)
4542
- {
3576
+ static FrtTermEnum *mr_terms(FrtIndexReader *ir, int field_num) {
4543
3577
  return frt_mte_new(MR(ir), field_num, NULL);
4544
3578
  }
4545
3579
 
4546
- static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term)
4547
- {
3580
+ static FrtTermEnum *mr_terms_from(FrtIndexReader *ir, int field_num, const char *term) {
4548
3581
  return frt_mte_new(MR(ir), field_num, term);
4549
3582
  }
4550
3583
 
4551
- static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
4552
- {
3584
+ static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t) {
4553
3585
  int total = 0; /* sum freqs in segments */
4554
3586
  int i = MR(ir)->r_cnt;
4555
3587
  for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
@@ -4562,13 +3594,11 @@ static int mr_doc_freq(FrtIndexReader *ir, int field_num, const char *t)
4562
3594
  return total;
4563
3595
  }
4564
3596
 
4565
- static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir)
4566
- {
3597
+ static FrtTermDocEnum *mr_term_docs(FrtIndexReader *ir) {
4567
3598
  return mtde_new(MR(ir));
4568
3599
  }
4569
3600
 
4570
- static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir)
4571
- {
3601
+ static FrtTermDocEnum *mr_term_positions(FrtIndexReader *ir) {
4572
3602
  return mtpe_new(MR(ir));
4573
3603
  }
4574
3604
 
@@ -4577,25 +3607,21 @@ static FrtTermVector *mr_term_vector(FrtIndexReader *ir, int doc_num, ID field)
4577
3607
  return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
4578
3608
  }
4579
3609
 
4580
- static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num)
4581
- {
3610
+ static FrtHash *mr_term_vectors(FrtIndexReader *ir, int doc_num) {
4582
3611
  GET_READER();
4583
3612
  return reader->term_vectors(reader, doc_num - MR(ir)->starts[i]);
4584
3613
  }
4585
3614
 
4586
- static bool mr_is_deleted(FrtIndexReader *ir, int doc_num)
4587
- {
3615
+ static bool mr_is_deleted(FrtIndexReader *ir, int doc_num) {
4588
3616
  GET_READER();
4589
3617
  return reader->is_deleted(reader, doc_num - MR(ir)->starts[i]);
4590
3618
  }
4591
3619
 
4592
- static bool mr_has_deletions(FrtIndexReader *ir)
4593
- {
3620
+ static bool mr_has_deletions(FrtIndexReader *ir) {
4594
3621
  return MR(ir)->has_deletions;
4595
3622
  }
4596
3623
 
4597
- static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val)
4598
- {
3624
+ static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val) {
4599
3625
  int i = mr_reader_index_i(MR(ir), doc_num);
4600
3626
  int fnum = frt_mr_get_field_num(MR(ir), i, field_num);
4601
3627
  if (fnum >= 0) {
@@ -4606,8 +3632,7 @@ static void mr_set_norm_i(FrtIndexReader *ir, int doc_num, int field_num, frt_uc
4606
3632
  }
4607
3633
  }
4608
3634
 
4609
- static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
4610
- {
3635
+ static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num) {
4611
3636
  GET_READER();
4612
3637
  MR(ir)->num_docs_cache = -1; /* invalidate cache */
4613
3638
 
@@ -4617,8 +3642,7 @@ static void mr_delete_doc_i(FrtIndexReader *ir, int doc_num)
4617
3642
  ir->has_changes = true;
4618
3643
  }
4619
3644
 
4620
- static void mr_undelete_all_i(FrtIndexReader *ir)
4621
- {
3645
+ static void mr_undelete_all_i(FrtIndexReader *ir) {
4622
3646
  int i;
4623
3647
  const int mr_reader_cnt = MR(ir)->r_cnt;
4624
3648
 
@@ -4631,8 +3655,7 @@ static void mr_undelete_all_i(FrtIndexReader *ir)
4631
3655
  ir->has_changes = true;
4632
3656
  }
4633
3657
 
4634
- static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
4635
- {
3658
+ static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter) {
4636
3659
  int i;
4637
3660
  ir->deleter = deleter;
4638
3661
  for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
@@ -4641,8 +3664,7 @@ static void mr_set_deleter_i(FrtIndexReader *ir, FrtDeleter *deleter)
4641
3664
  }
4642
3665
  }
4643
3666
 
4644
- static bool mr_is_latest_i(FrtIndexReader *ir)
4645
- {
3667
+ static bool mr_is_latest_i(FrtIndexReader *ir) {
4646
3668
  int i;
4647
3669
  const int mr_reader_cnt = MR(ir)->r_cnt;
4648
3670
  for (i = 0; i < mr_reader_cnt; i++) {
@@ -4653,8 +3675,7 @@ static bool mr_is_latest_i(FrtIndexReader *ir)
4653
3675
  return true;
4654
3676
  }
4655
3677
 
4656
- static void mr_commit_i(FrtIndexReader *ir)
4657
- {
3678
+ static void mr_commit_i(FrtIndexReader *ir) {
4658
3679
  int i;
4659
3680
  const int mr_reader_cnt = MR(ir)->r_cnt;
4660
3681
  for (i = 0; i < mr_reader_cnt; i++) {
@@ -4663,8 +3684,7 @@ static void mr_commit_i(FrtIndexReader *ir)
4663
3684
  }
4664
3685
  }
4665
3686
 
4666
- static void mr_close_i(FrtIndexReader *ir)
4667
- {
3687
+ static void mr_close_i(FrtIndexReader *ir) {
4668
3688
  int i;
4669
3689
  const int mr_reader_cnt = MR(ir)->r_cnt;
4670
3690
  for (i = 0; i < mr_reader_cnt; i++) {
@@ -4870,8 +3890,7 @@ FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store) {
4870
3890
  *
4871
3891
  ****************************************************************************/
4872
3892
 
4873
- static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
4874
- {
3893
+ static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos) {
4875
3894
  FrtOccurence *occ = FRT_MP_ALLOC(mp, FrtOccurence);
4876
3895
  occ->pos = pos;
4877
3896
  occ->next = NULL;
@@ -4884,8 +3903,7 @@ static FrtOccurence *occ_new(FrtMemoryPool *mp, int pos)
4884
3903
  *
4885
3904
  ****************************************************************************/
4886
3905
 
4887
- FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
4888
- {
3906
+ FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos) {
4889
3907
  FrtPosting *p = FRT_MP_ALLOC(mp, FrtPosting);
4890
3908
  p->doc_num = doc_num;
4891
3909
  p->first_occ = occ_new(mp, pos);
@@ -4901,8 +3919,7 @@ FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos)
4901
3919
  ****************************************************************************/
4902
3920
 
4903
3921
  FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
4904
- int term_len, FrtPosting *p)
4905
- {
3922
+ int term_len, FrtPosting *p) {
4906
3923
  // TODO account for term_len as measured in the original text vs utf8 term_len of term
4907
3924
  FrtPostingList *pl = FRT_MP_ALLOC(mp, FrtPostingList);
4908
3925
  pl->term = (char *)frt_mp_memdup(mp, term, term_len + 1);
@@ -4912,20 +3929,17 @@ FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
4912
3929
  return pl;
4913
3930
  }
4914
3931
 
4915
- void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos)
4916
- {
3932
+ void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos) {
4917
3933
  pl->last_occ = pl->last_occ->next = occ_new(mp, pos);
4918
3934
  pl->last->freq++;
4919
3935
  }
4920
3936
 
4921
- static void pl_add_posting(FrtPostingList *pl, FrtPosting *p)
4922
- {
3937
+ static void pl_add_posting(FrtPostingList *pl, FrtPosting *p) {
4923
3938
  pl->last = pl->last->next = p;
4924
3939
  pl->last_occ = p->first_occ;
4925
3940
  }
4926
3941
 
4927
- int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
4928
- {
3942
+ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2) {
4929
3943
  return strcmp((*pl1)->term, (*pl2)->term);
4930
3944
  }
4931
3945
 
@@ -4935,8 +3949,7 @@ int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2)
4935
3949
  *
4936
3950
  ****************************************************************************/
4937
3951
 
4938
- static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
4939
- {
3952
+ static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi) {
4940
3953
  FrtFieldInverter *fld_inv = FRT_MP_ALLOC(dw->mp, FrtFieldInverter);
4941
3954
  fld_inv->is_tokenized = bits_is_tokenized(fi->bits);
4942
3955
  fld_inv->store_term_vector = bits_store_term_vector(fi->bits);
@@ -4953,8 +3966,7 @@ static FrtFieldInverter *fld_inv_new(FrtDocWriter *dw, FrtFieldInfo *fi)
4953
3966
  return fld_inv;
4954
3967
  }
4955
3968
 
4956
- static void fld_inv_destroy(FrtFieldInverter *fld_inv)
4957
- {
3969
+ static void fld_inv_destroy(FrtFieldInverter *fld_inv) {
4958
3970
  frt_h_destroy(fld_inv->plists);
4959
3971
  }
4960
3972
 
@@ -4964,8 +3976,7 @@ static void fld_inv_destroy(FrtFieldInverter *fld_inv)
4964
3976
  *
4965
3977
  ****************************************************************************/
4966
3978
 
4967
- typedef struct SkipBuffer
4968
- {
3979
+ typedef struct SkipBuffer {
4969
3980
  FrtOutStream *buf;
4970
3981
  FrtOutStream *frq_out;
4971
3982
  FrtOutStream *prx_out;
@@ -4974,16 +3985,14 @@ typedef struct SkipBuffer
4974
3985
  frt_off_t last_prx_ptr;
4975
3986
  } SkipBuffer;
4976
3987
 
4977
- static void skip_buf_reset(SkipBuffer *skip_buf)
4978
- {
3988
+ static void skip_buf_reset(SkipBuffer *skip_buf) {
4979
3989
  frt_ramo_reset(skip_buf->buf);
4980
3990
  skip_buf->last_doc = 0;
4981
3991
  skip_buf->last_frq_ptr = frt_os_pos(skip_buf->frq_out);
4982
3992
  skip_buf->last_prx_ptr = frt_os_pos(skip_buf->prx_out);
4983
3993
  }
4984
3994
 
4985
- static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
4986
- {
3995
+ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out) {
4987
3996
  SkipBuffer *skip_buf = FRT_ALLOC(SkipBuffer);
4988
3997
  skip_buf->buf = frt_ram_new_buffer();
4989
3998
  skip_buf->frq_out = frq_out;
@@ -4991,8 +4000,7 @@ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
4991
4000
  return skip_buf;
4992
4001
  }
4993
4002
 
4994
- static void skip_buf_add(SkipBuffer *skip_buf, int doc)
4995
- {
4003
+ static void skip_buf_add(SkipBuffer *skip_buf, int doc) {
4996
4004
  frt_off_t frq_ptr = frt_os_pos(skip_buf->frq_out);
4997
4005
  frt_off_t prx_ptr = frt_os_pos(skip_buf->prx_out);
4998
4006
 
@@ -5005,15 +4013,13 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
5005
4013
  skip_buf->last_prx_ptr = prx_ptr;
5006
4014
  }
5007
4015
 
5008
- static frt_off_t skip_buf_write(SkipBuffer *skip_buf)
5009
- {
4016
+ static frt_off_t skip_buf_write(SkipBuffer *skip_buf) {
5010
4017
  frt_off_t skip_ptr = frt_os_pos(skip_buf->frq_out);
5011
4018
  frt_ramo_write_to(skip_buf->buf, skip_buf->frq_out);
5012
4019
  return skip_ptr;
5013
4020
  }
5014
4021
 
5015
- static void skip_buf_destroy(SkipBuffer *skip_buf)
5016
- {
4022
+ static void skip_buf_destroy(SkipBuffer *skip_buf) {
5017
4023
  frt_ram_destroy_buffer(skip_buf->buf);
5018
4024
  free(skip_buf);
5019
4025
  }
@@ -5024,21 +4030,19 @@ static void skip_buf_destroy(SkipBuffer *skip_buf)
5024
4030
  *
5025
4031
  ****************************************************************************/
5026
4032
 
5027
- static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv)
5028
- {
4033
+ static void dw_write_norms(FrtDocWriter *dw, FrtFieldInverter *fld_inv) {
5029
4034
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5030
4035
  FrtOutStream *norms_out;
5031
4036
  frt_si_advance_norm_gen(dw->si, fld_inv->fi->number);
5032
4037
  si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
5033
- norms_out = dw->store->new_output(dw->store, file_name);
4038
+ norms_out = dw->store->new_output(dw->store, segm_idx_name, file_name);
5034
4039
  frt_os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
5035
4040
  frt_os_close(norms_out);
5036
4041
  }
5037
4042
 
5038
4043
  /* we'll use the postings Hash's table area to sort the postings as it is
5039
4044
  * going to be zeroset soon anyway */
5040
- static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
5041
- {
4045
+ static FrtPostingList **dw_sort_postings(FrtHash *plists_ht) {
5042
4046
  int i, j;
5043
4047
  FrtHashEntry *he;
5044
4048
  FrtPostingList **plists = (FrtPostingList **)plists_ht->table;
@@ -5056,8 +4060,7 @@ static FrtPostingList **dw_sort_postings(FrtHash *plists_ht)
5056
4060
  return plists;
5057
4061
  }
5058
4062
 
5059
- static void dw_flush_streams(FrtDocWriter *dw)
5060
- {
4063
+ static void dw_flush_streams(FrtDocWriter *dw) {
5061
4064
  frt_mp_reset(dw->mp);
5062
4065
  frt_fw_close(dw->fw);
5063
4066
  dw->fw = NULL;
@@ -5065,8 +4068,7 @@ static void dw_flush_streams(FrtDocWriter *dw)
5065
4068
  dw->doc_num = 0;
5066
4069
  }
5067
4070
 
5068
- static void dw_flush(FrtDocWriter *dw)
5069
- {
4071
+ static void dw_flush(FrtDocWriter *dw) {
5070
4072
  int i, j, last_doc, doc_code, doc_freq, last_pos, posting_count;
5071
4073
  int skip_interval = dw->skip_interval;
5072
4074
  FrtFieldInfos *fis = dw->fis;
@@ -5084,9 +4086,9 @@ static void dw_flush(FrtDocWriter *dw)
5084
4086
  SkipBuffer *skip_buf;
5085
4087
 
5086
4088
  sprintf(file_name, "%s.frq", dw->si->name);
5087
- frq_out = store->new_output(store, file_name);
4089
+ frq_out = store->new_output(store, segm_idx_name, file_name);
5088
4090
  sprintf(file_name, "%s.prx", dw->si->name);
5089
- prx_out = store->new_output(store, file_name);
4091
+ prx_out = store->new_output(store, segm_idx_name, file_name);
5090
4092
  skip_buf = skip_buf_new(frq_out, prx_out);
5091
4093
 
5092
4094
  for (i = 0; i < fields_count; i++) {
@@ -5120,8 +4122,7 @@ static void dw_flush(FrtDocWriter *dw)
5120
4122
 
5121
4123
  if (p->freq == 1) {
5122
4124
  frt_os_write_vint(frq_out, 1|doc_code);
5123
- }
5124
- else {
4125
+ } else {
5125
4126
  frt_os_write_vint(frq_out, doc_code);
5126
4127
  frt_os_write_vint(frq_out, p->freq);
5127
4128
  }
@@ -5144,8 +4145,7 @@ static void dw_flush(FrtDocWriter *dw)
5144
4145
  dw_flush_streams(dw);
5145
4146
  }
5146
4147
 
5147
- FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
5148
- {
4148
+ FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si) {
5149
4149
  FrtStore *store = iw->store;
5150
4150
  FrtMemoryPool *mp = frt_mp_new_capa(iw->config.chunk_size,
5151
4151
  iw->config.max_buffer_memory/iw->config.chunk_size);
@@ -5177,14 +4177,12 @@ FrtDocWriter *frt_dw_open(FrtIndexWriter *iw, FrtSegmentInfo *si)
5177
4177
  return dw;
5178
4178
  }
5179
4179
 
5180
- void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si)
5181
- {
4180
+ void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si) {
5182
4181
  dw->fw = frt_fw_open(dw->store, si->name, dw->fis);
5183
4182
  dw->si = si;
5184
4183
  }
5185
4184
 
5186
- void frt_dw_close(FrtDocWriter *dw)
5187
- {
4185
+ void frt_dw_close(FrtDocWriter *dw) {
5188
4186
  if (dw->doc_num) {
5189
4187
  dw_flush(dw);
5190
4188
  }
@@ -5199,8 +4197,7 @@ void frt_dw_close(FrtDocWriter *dw)
5199
4197
  free(dw);
5200
4198
  }
5201
4199
 
5202
- FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi)
5203
- {
4200
+ FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi) {
5204
4201
  FrtFieldInverter *fld_inv = (FrtFieldInverter*)frt_h_get_int(dw->fields, fi->number);
5205
4202
 
5206
4203
  if (!fld_inv) {
@@ -5216,8 +4213,7 @@ static void dw_add_posting(FrtMemoryPool *mp,
5216
4213
  int doc_num,
5217
4214
  const char *text,
5218
4215
  int len,
5219
- int pos)
5220
- {
4216
+ int pos) {
5221
4217
  FrtHashEntry *pl_he;
5222
4218
  if (frt_h_set_ext(curr_plists, text, &pl_he)) {
5223
4219
  FrtPosting *p = frt_p_new(mp, doc_num, pos);
@@ -5227,21 +4223,18 @@ static void dw_add_posting(FrtMemoryPool *mp,
5227
4223
  if (frt_h_set_ext(fld_plists, text, &fld_pl_he)) {
5228
4224
  fld_pl_he->value = pl = frt_pl_new(mp, text, len, p);
5229
4225
  pl_he->key = fld_pl_he->key = (char *)pl->term;
5230
- }
5231
- else {
4226
+ } else {
5232
4227
  pl = (FrtPostingList *)fld_pl_he->value;
5233
4228
  pl_add_posting(pl, p);
5234
4229
  pl_he->key = (char *)pl->term;
5235
4230
  }
5236
4231
  pl_he->value = pl;
5237
- }
5238
- else {
4232
+ } else {
5239
4233
  frt_pl_add_occ(mp, (FrtPostingList *)pl_he->value, pos);
5240
4234
  }
5241
4235
  }
5242
4236
 
5243
- static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end)
5244
- {
4237
+ static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end) {
5245
4238
  if (pos >= dw->offsets_capa) {
5246
4239
  int old_capa = dw->offsets_capa;
5247
4240
  while (pos >= dw->offsets_capa) {
@@ -5305,7 +4298,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDoc
5305
4298
  buf[FRT_MAX_WORD_SIZE - 1] = '\0';
5306
4299
  for (i = 0; i < df_size; i++) {
5307
4300
  int len = df->lengths[i];
5308
- char *data_ptr = df->data[i];
4301
+ const char *data_ptr = df->data[i];
5309
4302
  if (len >= FRT_MAX_WORD_SIZE) {
5310
4303
  char *head_last = rb_enc_left_char_head(data_ptr, data_ptr + FRT_MAX_WORD_SIZE - 1, data_ptr + len, df->encodings[i]);
5311
4304
  len = head_last - data_ptr;
@@ -5347,7 +4340,7 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
5347
4340
  FrtFieldInverter *fld_inv;
5348
4341
  FrtHash *postings;
5349
4342
  FrtFieldInfo *fi;
5350
- const int doc_size = doc->size;
4343
+ const int doc_size = doc->field_count;
5351
4344
 
5352
4345
  /* frt_fw_add_doc will add new fields as necessary */
5353
4346
  frt_fw_add_doc(dw->fw, doc);
@@ -5384,484 +4377,23 @@ void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc) {
5384
4377
  * IndexWriter
5385
4378
  *
5386
4379
  ****************************************************************************/
5387
- /****************************************************************************
5388
- * SegmentMergeInfo
5389
- ****************************************************************************/
5390
-
5391
- typedef struct SegmentMergeInfo {
5392
- int base;
5393
- int max_doc;
5394
- int doc_cnt;
5395
- FrtSegmentInfo *si;
5396
- FrtStore *store;
5397
- FrtStore *orig_store;
5398
- FrtBitVector *deleted_docs;
5399
- FrtSegmentFieldIndex *sfi;
5400
- FrtTermEnum *te;
5401
- FrtTermDocEnum *tde;
5402
- char *term;
5403
- int *doc_map;
5404
- FrtInStream *frq_in;
5405
- FrtInStream *prx_in;
5406
- } SegmentMergeInfo;
5407
-
5408
- static bool smi_lt(const SegmentMergeInfo *smi1, const SegmentMergeInfo *smi2)
5409
- {
5410
- int cmpres = strcmp(smi1->term, smi2->term);
5411
- if (0 == cmpres) {
5412
- return smi1->base < smi2->base;
5413
- }
5414
- else {
5415
- return cmpres < 0;
5416
- }
5417
- }
5418
-
5419
- static void smi_load_doc_map(SegmentMergeInfo *smi)
5420
- {
5421
- FrtBitVector *deleted_docs = smi->deleted_docs;
5422
- const int max_doc = smi->max_doc;
5423
- int j = 0, i;
5424
-
5425
- smi->doc_map = FRT_ALLOC_N(int, max_doc);
5426
- for (i = 0; i < max_doc; i++) {
5427
- if (frt_bv_get(deleted_docs, i)) {
5428
- smi->doc_map[i] = -1;
5429
- }
5430
- else {
5431
- smi->doc_map[i] = j++;
5432
- }
5433
- }
5434
- smi->doc_cnt = j;
5435
- }
5436
-
5437
- static SegmentMergeInfo *smi_new(int base, FrtStore *store, FrtSegmentInfo *si)
5438
- {
5439
- SegmentMergeInfo *smi = FRT_ALLOC_AND_ZERO(SegmentMergeInfo);
5440
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5441
- char *segment = si->name;
5442
- smi->base = base;
5443
- smi->si = si;
5444
- smi->orig_store = smi->store = store;
5445
- FRT_REF(smi->orig_store);
5446
-
5447
- sprintf(file_name, "%s.fdx", segment);
5448
- smi->doc_cnt = smi->max_doc
5449
- = smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
5450
-
5451
- if (si->del_gen >= 0) {
5452
- frt_fn_for_generation(file_name, segment, "del", si->del_gen);
5453
- smi->deleted_docs = bv_read(store, file_name);
5454
- smi_load_doc_map(smi);
5455
- }
5456
- return smi;
5457
- }
5458
-
5459
- static void smi_load_term_input(SegmentMergeInfo *smi)
5460
- {
5461
- FrtStore *store = smi->store;
5462
- char *segment = smi->si->name;
5463
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5464
- smi->sfi = frt_sfi_open(store, segment);
5465
- sprintf(file_name, "%s.tis", segment);
5466
- FrtInStream *is = store->open_input(store, file_name);
5467
- FRT_DEREF(is);
5468
- smi->te = TE(frt_ste_new(is, smi->sfi));
5469
- sprintf(file_name, "%s.frq", segment);
5470
- smi->frq_in = store->open_input(store, file_name);
5471
- sprintf(file_name, "%s.prx", segment);
5472
- smi->prx_in = store->open_input(store, file_name);
5473
- smi->tde = frt_stpe_new(NULL, smi->frq_in, smi->prx_in, smi->deleted_docs,
5474
- STE(smi->te)->skip_interval);
5475
- }
5476
-
5477
- static void smi_close_term_input(SegmentMergeInfo *smi)
5478
- {
5479
- frt_ste_close(smi->te);
5480
- frt_sfi_close(smi->sfi);
5481
- stpe_close(smi->tde);
5482
- frt_is_close(smi->frq_in);
5483
- frt_is_close(smi->prx_in);
5484
- }
5485
-
5486
- static void smi_destroy(SegmentMergeInfo *smi)
5487
- {
5488
- if (smi->store != smi->orig_store) {
5489
- frt_store_close(smi->store);
5490
- }
5491
- frt_store_close(smi->orig_store);
5492
- if (smi->deleted_docs) {
5493
- frt_bv_destroy(smi->deleted_docs);
5494
- free(smi->doc_map);
5495
- }
5496
- free(smi);
5497
- }
5498
-
5499
- static char *smi_next(SegmentMergeInfo *smi)
5500
- {
5501
- return (smi->term = ste_next(smi->te));
5502
- }
5503
-
5504
- /****************************************************************************
5505
- * SegmentMerger
5506
- ****************************************************************************/
5507
-
5508
- typedef struct SegmentMerger {
5509
- FrtTermInfo ti;
5510
- FrtStore *store;
5511
- FrtFieldInfos *fis;
5512
- FrtSegmentInfo *si;
5513
- SegmentMergeInfo **smis;
5514
- int seg_cnt;
5515
- int doc_cnt;
5516
- FrtConfig *config;
5517
- FrtTermInfosWriter *tiw;
5518
- char *term_buf;
5519
- int term_buf_ptr;
5520
- int term_buf_size;
5521
- FrtPriorityQueue *queue;
5522
- SkipBuffer *skip_buf;
5523
- FrtOutStream *frq_out;
5524
- FrtOutStream *prx_out;
5525
- } SegmentMerger;
5526
-
5527
- static SegmentMerger *sm_create(FrtIndexWriter *iw, FrtSegmentInfo *si, FrtSegmentInfo **seg_infos, const int seg_cnt)
5528
- {
5529
- int i;
5530
- SegmentMerger *sm = FRT_ALLOC_AND_ZERO_N(SegmentMerger, seg_cnt);
5531
- sm->store = iw->store;
5532
- FRT_REF(sm->store);
5533
- sm->fis = iw->fis;
5534
- sm->si = si;
5535
- sm->doc_cnt = 0;
5536
- sm->smis = FRT_ALLOC_N(SegmentMergeInfo *, seg_cnt);
5537
- for (i = 0; i < seg_cnt; i++) {
5538
- sm->smis[i] = smi_new(sm->doc_cnt, seg_infos[i]->store, seg_infos[i]);
5539
- sm->doc_cnt += sm->smis[i]->doc_cnt;
5540
- }
5541
- sm->seg_cnt = seg_cnt;
5542
- sm->config = &iw->config;
5543
- return sm;
5544
- }
5545
-
5546
- static void sm_destroy(SegmentMerger *sm)
5547
- {
5548
- int i;
5549
- const int seg_cnt = sm->seg_cnt;
5550
- for (i = 0; i < seg_cnt; i++) {
5551
- smi_destroy(sm->smis[i]);
5552
- }
5553
- frt_store_close(sm->store);
5554
- free(sm->smis);
5555
- free(sm);
5556
- }
5557
-
5558
- static void sm_merge_fields(SegmentMerger *sm)
5559
- {
5560
- int i, j;
5561
- frt_off_t start, end = 0;
5562
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5563
- FrtOutStream *fdt_out, *fdx_out;
5564
- FrtStore *store = sm->store;
5565
- const int seg_cnt = sm->seg_cnt;
5566
-
5567
- sprintf(file_name, "%s.fdt", sm->si->name);
5568
- fdt_out = store->new_output(store, file_name);
5569
-
5570
- sprintf(file_name, "%s.fdx", sm->si->name);
5571
- fdx_out = store->new_output(store, file_name);
5572
-
5573
- for (i = 0; i < seg_cnt; i++) {
5574
- SegmentMergeInfo *smi = sm->smis[i];
5575
- const int max_doc = smi->max_doc;
5576
- FrtInStream *fdt_in, *fdx_in;
5577
- char *segment = smi->si->name;
5578
- store = smi->store;
5579
- sprintf(file_name, "%s.fdt", segment);
5580
- fdt_in = store->open_input(store, file_name);
5581
- sprintf(file_name, "%s.fdx", segment);
5582
- fdx_in = store->open_input(store, file_name);
5583
-
5584
- if (max_doc > 0) {
5585
- end = (off_t)frt_is_read_u64(fdx_in);
5586
- }
5587
- for (j = 0; j < max_doc; j++) {
5588
- frt_u32 tv_idx_offset = frt_is_read_u32(fdx_in);
5589
- start = end;
5590
- if (j == max_doc - 1) {
5591
- end = frt_is_length(fdt_in);
5592
- }
5593
- else {
5594
- end = (off_t)frt_is_read_u64(fdx_in);
5595
- }
5596
- /* skip deleted docs */
5597
- if (!smi->deleted_docs || !frt_bv_get(smi->deleted_docs, j)) {
5598
- frt_os_write_u64(fdx_out, frt_os_pos(fdt_out));
5599
- frt_os_write_u32(fdx_out, tv_idx_offset);
5600
- frt_is_seek(fdt_in, start);
5601
- frt_is2os_copy_bytes(fdt_in, fdt_out, end - start);
5602
- }
5603
- }
5604
- frt_is_close(fdt_in);
5605
- frt_is_close(fdx_in);
5606
- }
5607
- frt_os_close(fdt_out);
5608
- frt_os_close(fdx_out);
5609
- }
5610
-
5611
- static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
5612
- const int match_size)
5613
- {
5614
- int i;
5615
- int last_doc = 0, base, doc, doc_code, freq;
5616
- int skip_interval = sm->config->skip_interval;
5617
- int *doc_map = NULL;
5618
- int df = 0; /* number of docs w/ term */
5619
- FrtTermDocEnum *tde;
5620
- SegmentMergeInfo *smi;
5621
- SkipBuffer *skip_buf = sm->skip_buf;
5622
- skip_buf_reset(skip_buf);
5623
-
5624
- for (i = 0; i < match_size; i++) {
5625
- smi = matches[i];
5626
- base = smi->base;
5627
- doc_map = smi->doc_map;
5628
- tde = smi->tde;
5629
- stpe_seek_ti(STDE(tde), &smi->te->curr_ti);
5630
-
5631
- /* since we are using copy_bytes below to copy the proximities we use
5632
- * stde_next rather than stpe_next here */
5633
- while (stde_next(tde)) {
5634
- doc = stde_doc_num(tde);
5635
- if (NULL != doc_map) {
5636
- doc = doc_map[doc]; /* work around deletions */
5637
- }
5638
- doc += base; /* convert to merged space */
5639
- assert(doc == 0 || doc > last_doc);
5640
-
5641
- df++;
5642
- if (0 == (df % skip_interval)) {
5643
- skip_buf_add(skip_buf, last_doc);
5644
- }
5645
-
5646
- doc_code = (doc - last_doc) << 1; /* use low bit to flag freq=1 */
5647
- last_doc = doc;
5648
-
5649
- freq = stde_freq(tde);
5650
- if (freq == 1) {
5651
- frt_os_write_vint(sm->frq_out, doc_code | 1); /* doc & freq=1 */
5652
- }
5653
- else {
5654
- frt_os_write_vint(sm->frq_out, doc_code); /* write doc */
5655
- frt_os_write_vint(sm->frq_out, freq); /* write freqency in doc */
5656
- }
5657
-
5658
- /* copy position deltas */
5659
- frt_is2os_copy_vints(STDE(tde)->prx_in, sm->prx_out, freq);
5660
- }
5661
- }
5662
- return df;
5663
- }
5664
-
5665
- static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
5666
- {
5667
- term = (char *)memcpy(sm->term_buf + sm->term_buf_ptr, term, term_len + 1);
5668
- sm->term_buf_ptr += term_len + 1;
5669
- if (sm->term_buf_ptr > sm->term_buf_size) {
5670
- sm->term_buf_ptr = 0;
5671
- }
5672
- return term;
5673
- }
5674
-
5675
- static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
5676
- int match_size)
5677
- {
5678
- frt_off_t frq_ptr = frt_os_pos(sm->frq_out);
5679
- frt_off_t prx_ptr = frt_os_pos(sm->prx_out);
5680
-
5681
- int df = sm_append_postings(sm, matches, match_size); /* append posting data */
5682
-
5683
- frt_off_t skip_ptr = skip_buf_write(sm->skip_buf);
5684
-
5685
- if (df > 0) {
5686
- /* add an entry to the dictionary with ptrs to prox and freq files */
5687
- SegmentMergeInfo *first_match = matches[0];
5688
- int term_len = first_match->te->curr_term_len;
5689
-
5690
- frt_ti_set(sm->ti, df, frq_ptr, prx_ptr,
5691
- (skip_ptr - frq_ptr));
5692
- frt_tiw_add(sm->tiw, sm_cache_term(sm, first_match->term, term_len),
5693
- term_len, &sm->ti);
5694
- }
5695
- }
5696
-
5697
- static void sm_merge_term_infos(SegmentMerger *sm)
5698
- {
5699
- int i, j, match_size;
5700
- SegmentMergeInfo *smi, *top, **matches;
5701
- char *term;
5702
- const int seg_cnt = sm->seg_cnt;
5703
- const int fis_size = sm->fis->size;
5704
-
5705
- matches = FRT_ALLOC_N(SegmentMergeInfo *, seg_cnt);
5706
-
5707
- for (j = 0; j < seg_cnt; j++) {
5708
- smi_load_term_input(sm->smis[j]);
5709
- }
5710
-
5711
- for (i = 0; i < fis_size; i++) {
5712
- frt_tiw_start_field(sm->tiw, i);
5713
- for (j = 0; j < seg_cnt; j++) {
5714
- smi = sm->smis[j];
5715
- ste_set_field(smi->te, i);
5716
- if (NULL != smi_next(smi)) {
5717
- frt_pq_push(sm->queue, smi); /* initialize @queue */
5718
- }
5719
- }
5720
- while (sm->queue->size > 0) {
5721
- match_size = 0; /* pop matching terms */
5722
- matches[0] = (SegmentMergeInfo *)frt_pq_pop(sm->queue);
5723
- match_size++;
5724
- term = matches[0]->term;
5725
- top = (SegmentMergeInfo *)frt_pq_top(sm->queue);
5726
- while ((NULL != top) && (0 == strcmp(term, top->term))) {
5727
- matches[match_size] = (SegmentMergeInfo *)frt_pq_pop(sm->queue);
5728
- match_size++;
5729
- top = (SegmentMergeInfo *)frt_pq_top(sm->queue);
5730
- }
5731
-
5732
- sm_merge_term_info(sm, matches, match_size);/* add new FrtTermInfo */
5733
-
5734
- while (match_size > 0) {
5735
- match_size--;
5736
- smi = matches[match_size];
5737
- if (NULL != smi_next(smi)) {
5738
- frt_pq_push(sm->queue, smi); /* restore queue */
5739
- }
5740
- }
5741
- }
5742
- }
5743
- free(matches);
5744
- for (j = 0; j < seg_cnt; j++) {
5745
- smi_close_term_input(sm->smis[j]);
5746
- }
5747
- }
5748
-
5749
- static void sm_merge_terms(SegmentMerger *sm)
5750
- {
5751
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5752
-
5753
- sprintf(file_name, "%s.frq", sm->si->name);
5754
- sm->frq_out = sm->store->new_output(sm->store, file_name);
5755
- sprintf(file_name, "%s.prx", sm->si->name);
5756
- sm->prx_out = sm->store->new_output(sm->store, file_name);
5757
-
5758
- sm->tiw = frt_tiw_open(sm->store, sm->si->name, sm->config->index_interval,
5759
- sm->config->skip_interval);
5760
- sm->skip_buf = skip_buf_new(sm->frq_out, sm->prx_out);
5761
-
5762
- /* terms_buf_ptr holds a buffer of terms since the FrtTermInfosWriter needs
5763
- * to keep the last index_interval terms so that it can compare the last
5764
- * term put in the index with the next one. So the size of the buffer must
5765
- * by index_interval + 2. */
5766
- sm->term_buf_ptr = 0;
5767
- sm->term_buf_size = (sm->config->index_interval + 1) * FRT_MAX_WORD_SIZE;
5768
- sm->term_buf = FRT_ALLOC_N(char, sm->term_buf_size + FRT_MAX_WORD_SIZE);
5769
-
5770
- sm->queue = frt_pq_new(sm->seg_cnt, (frt_lt_ft)&smi_lt, NULL);
5771
-
5772
- sm_merge_term_infos(sm);
5773
-
5774
- frt_os_close(sm->frq_out);
5775
- frt_os_close(sm->prx_out);
5776
- frt_tiw_close(sm->tiw);
5777
- frt_pq_destroy(sm->queue);
5778
- skip_buf_destroy(sm->skip_buf);
5779
- free(sm->term_buf);
5780
- }
5781
-
5782
- static void sm_merge_norms(SegmentMerger *sm)
5783
- {
5784
- FrtSegmentInfo *si;
5785
- int i, j, k;
5786
- FrtStore *store;
5787
- frt_uchar byte;
5788
- FrtFieldInfo *fi;
5789
- FrtOutStream *os;
5790
- FrtInStream *is;
5791
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5792
- SegmentMergeInfo *smi;
5793
- const int seg_cnt = sm->seg_cnt;
5794
- for (i = sm->fis->size - 1; i >= 0; i--) {
5795
- fi = sm->fis->fields[i];
5796
- if (bits_has_norms(fi->bits)) {
5797
- si = sm->si;
5798
- frt_si_advance_norm_gen(si, i);
5799
- si_norm_file_name(si, file_name, i);
5800
- os = sm->store->new_output(sm->store, file_name);
5801
- for (j = 0; j < seg_cnt; j++) {
5802
- smi = sm->smis[j];
5803
- si = smi->si;
5804
- if (si_norm_file_name(si, file_name, i)) {
5805
- const int max_doc = smi->max_doc;
5806
- FrtBitVector *deleted_docs = smi->deleted_docs;
5807
- store = smi->store;
5808
- is = store->open_input(store, file_name);
5809
- if (deleted_docs) {
5810
- for (k = 0; k < max_doc; k++) {
5811
- byte = frt_is_read_byte(is);
5812
- if (!frt_bv_get(deleted_docs, k)) {
5813
- frt_os_write_byte(os, byte);
5814
- }
5815
- }
5816
- }
5817
- else {
5818
- frt_is2os_copy_bytes(is, os, max_doc);
5819
- }
5820
- frt_is_close(is);
5821
- }
5822
- else {
5823
- const int doc_cnt = smi->doc_cnt;
5824
- for (k = 0; k < doc_cnt; k++) {
5825
- frt_os_write_byte(os, '\0');
5826
- }
5827
- }
5828
- }
5829
- frt_os_close(os);
5830
- }
5831
- }
5832
- }
5833
-
5834
- static int sm_merge(SegmentMerger *sm)
5835
- {
5836
- sm_merge_fields(sm);
5837
- sm_merge_terms(sm);
5838
- sm_merge_norms(sm);
5839
- return sm->doc_cnt;
5840
- }
5841
-
5842
-
5843
- /****************************************************************************
5844
- * IndexWriter
5845
- ****************************************************************************/
5846
4380
 
5847
4381
  /* prepare an index ready for writing */
5848
- void frt_index_create(FrtStore *store, FrtFieldInfos *fis)
5849
- {
4382
+ void frt_index_create(FrtStore *store, FrtFieldInfos *fis) {
5850
4383
  FrtSegmentInfos *sis = frt_sis_new(fis);
5851
- store->clear_all(store);
4384
+ store->clear_all(store, segm_idx_name);
5852
4385
  frt_sis_write(sis, store, NULL);
5853
4386
  frt_sis_destroy(sis);
5854
4387
  }
5855
4388
 
5856
4389
  bool frt_index_is_locked(FrtStore *store) {
5857
- FrtLock *write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
4390
+ FrtLock *write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
5858
4391
  bool is_locked = write_lock->is_locked(write_lock);
5859
4392
  frt_close_lock(write_lock);
5860
4393
  return is_locked;
5861
4394
  }
5862
4395
 
5863
- int frt_iw_doc_count(FrtIndexWriter *iw)
5864
- {
4396
+ int frt_iw_doc_count(FrtIndexWriter *iw) {
5865
4397
  int i, doc_cnt = 0;
5866
4398
  pthread_mutex_lock(&iw->mutex);
5867
4399
  for (i = iw->sis->size - 1; i >= 0; i--) {
@@ -5874,68 +4406,6 @@ int frt_iw_doc_count(FrtIndexWriter *iw)
5874
4406
  return doc_cnt;
5875
4407
  }
5876
4408
 
5877
- static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg, const int max_seg) {
5878
- int i;
5879
- FrtSegmentInfos *sis = iw->sis;
5880
- FrtSegmentInfo *si = frt_sis_new_segment(sis, 0, iw->store);
5881
-
5882
- SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg], max_seg - min_seg);
5883
-
5884
- /* This is where all the action happens. */
5885
- si->doc_cnt = sm_merge(merger);
5886
-
5887
- pthread_mutex_lock(&iw->store->mutex);
5888
- /* delete merged segments */
5889
- for (i = min_seg; i < max_seg; i++) {
5890
- si_delete_files(sis->segs[i], iw->fis, iw->deleter);
5891
- }
5892
-
5893
- frt_sis_del_from_to(sis, min_seg, max_seg);
5894
-
5895
- frt_sis_write(sis, iw->store, iw->deleter);
5896
- deleter_commit_pending_deletions(iw->deleter);
5897
-
5898
- pthread_mutex_unlock(&iw->store->mutex);
5899
-
5900
- sm_destroy(merger);
5901
- }
5902
-
5903
- static void iw_merge_segments_from(FrtIndexWriter *iw, int min_segment)
5904
- {
5905
- iw_merge_segments(iw, min_segment, iw->sis->size);
5906
- }
5907
-
5908
- static void iw_maybe_merge_segments(FrtIndexWriter *iw)
5909
- {
5910
- int target_merge_docs = iw->config.merge_factor;
5911
- int min_segment, merge_docs;
5912
- FrtSegmentInfo *si;
5913
-
5914
- while (target_merge_docs > 0
5915
- && target_merge_docs <= iw->config.max_merge_docs) {
5916
- /* find segments smaller than current target size */
5917
- min_segment = iw->sis->size - 1;
5918
- merge_docs = 0;
5919
- while (min_segment >= 0) {
5920
- si = iw->sis->segs[min_segment];
5921
- if (si->doc_cnt >= target_merge_docs) {
5922
- break;
5923
- }
5924
- merge_docs += si->doc_cnt;
5925
- min_segment--;
5926
- }
5927
-
5928
- if (merge_docs >= target_merge_docs) { /* found a merge to do */
5929
- iw_merge_segments_from(iw, min_segment + 1);
5930
- }
5931
- else if (min_segment <= 0) {
5932
- break;
5933
- }
5934
-
5935
- target_merge_docs *= iw->config.merge_factor;
5936
- }
5937
- }
5938
-
5939
4409
  static void iw_flush_ram_segment(FrtIndexWriter *iw) {
5940
4410
  FrtSegmentInfos *sis = iw->sis;
5941
4411
  FrtSegmentInfo *si;
@@ -5947,16 +4417,13 @@ static void iw_flush_ram_segment(FrtIndexWriter *iw) {
5947
4417
  frt_sis_write(iw->sis, iw->store, iw->deleter);
5948
4418
  deleter_commit_pending_deletions(iw->deleter);
5949
4419
  pthread_mutex_unlock(&iw->store->mutex);
5950
- // iw_maybe_merge_segments(iw);
5951
4420
  }
5952
4421
 
5953
- void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
5954
- {
4422
+ void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc) {
5955
4423
  pthread_mutex_lock(&iw->mutex);
5956
4424
  if (NULL == iw->dw) {
5957
4425
  iw->dw = frt_dw_open(iw, frt_sis_new_segment(iw->sis, 0, iw->store));
5958
- }
5959
- else if (NULL == iw->dw->fw) {
4426
+ } else if (NULL == iw->dw->fw) {
5960
4427
  frt_dw_new_segment(iw->dw, frt_sis_new_segment(iw->sis, 0, iw->store));
5961
4428
  }
5962
4429
  frt_dw_add_doc(iw->dw, doc);
@@ -5967,15 +4434,13 @@ void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc)
5967
4434
  pthread_mutex_unlock(&iw->mutex);
5968
4435
  }
5969
4436
 
5970
- static void iw_commit_i(FrtIndexWriter *iw)
5971
- {
4437
+ static void iw_commit_i(FrtIndexWriter *iw) {
5972
4438
  if (iw->dw && iw->dw->doc_num > 0) {
5973
4439
  iw_flush_ram_segment(iw);
5974
4440
  }
5975
4441
  }
5976
4442
 
5977
- void frt_iw_commit(FrtIndexWriter *iw)
5978
- {
4443
+ void frt_iw_commit(FrtIndexWriter *iw) {
5979
4444
  pthread_mutex_lock(&iw->mutex);
5980
4445
  iw_commit_i(iw);
5981
4446
  pthread_mutex_unlock(&iw->mutex);
@@ -6051,28 +4516,7 @@ void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int t
6051
4516
  }
6052
4517
  }
6053
4518
 
6054
- static void iw_optimize_i(FrtIndexWriter *iw)
6055
- {
6056
- int min_segment;
6057
- iw_commit_i(iw);
6058
- // while (iw->sis->size > 1
6059
- // || (iw->sis->size == 1
6060
- // && (frt_si_has_deletions(iw->sis->segs[0])
6061
- // || (iw->sis->segs[0]->store != iw->store)))) {
6062
- // min_segment = iw->sis->size - iw->config.merge_factor;
6063
- // iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
6064
- // }
6065
- }
6066
-
6067
- void frt_iw_optimize(FrtIndexWriter *iw)
6068
- {
6069
- pthread_mutex_lock(&iw->mutex);
6070
- iw_optimize_i(iw);
6071
- pthread_mutex_unlock(&iw->mutex);
6072
- }
6073
-
6074
- void frt_iw_close(FrtIndexWriter *iw)
6075
- {
4519
+ void frt_iw_close(FrtIndexWriter *iw) {
6076
4520
  pthread_mutex_lock(&iw->mutex);
6077
4521
  iw_commit_i(iw);
6078
4522
  if (iw->dw) {
@@ -6108,7 +4552,7 @@ FrtIndexWriter *frt_iw_open(FrtIndexWriter *iw, FrtStore *store, FrtAnalyzer *vo
6108
4552
  iw->config = *config;
6109
4553
 
6110
4554
  FRT_TRY
6111
- iw->write_lock = frt_open_lock(store, FRT_WRITE_LOCK_NAME);
4555
+ iw->write_lock = frt_open_lock(store, segm_idx_name, FRT_WRITE_LOCK_NAME);
6112
4556
  if (!iw->write_lock->obtain(iw->write_lock)) {
6113
4557
  FRT_RAISE(FRT_LOCK_ERROR, "Couldn't obtain write lock when opening IndexWriter");
6114
4558
  }
@@ -6150,21 +4594,21 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
6150
4594
  char *sr_segment = sr->si->name;
6151
4595
 
6152
4596
  sprintf(file_name, "%s.fdt", segment);
6153
- fdt_out = store_out->new_output(store_out, file_name);
4597
+ fdt_out = store_out->new_output(store_out, segm_idx_name, file_name);
6154
4598
  sprintf(file_name, "%s.fdx", segment);
6155
- fdx_out = store_out->new_output(store_out, file_name);
4599
+ fdx_out = store_out->new_output(store_out, segm_idx_name, file_name);
6156
4600
 
6157
4601
  sprintf(file_name, "%s.fdt", sr_segment);
6158
- fdt_in = store_in->open_input(store_in, file_name);
4602
+ fdt_in = store_in->open_input(store_in, segm_idx_name, file_name);
6159
4603
  sprintf(file_name, "%s.fdx", sr_segment);
6160
- fdx_in = store_in->open_input(store_in, file_name);
4604
+ fdx_in = store_in->open_input(store_in, segm_idx_name, file_name);
6161
4605
 
6162
4606
  sprintf(file_name, "%s.del", sr_segment);
6163
- if (store_in->exists(store_in, file_name)) {
4607
+ if (store_in->exists(store_in, segm_idx_name, file_name)) {
6164
4608
  FrtOutStream *del_out;
6165
- FrtInStream *del_in = store_in->open_input(store_in, file_name);
4609
+ FrtInStream *del_in = store_in->open_input(store_in, segm_idx_name, file_name);
6166
4610
  sprintf(file_name, "%s.del", segment);
6167
- del_out = store_out->new_output(store_out, file_name);
4611
+ del_out = store_out->new_output(store_out, segm_idx_name, file_name);
6168
4612
  frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
6169
4613
  frt_os_close(del_out);
6170
4614
  frt_is_close(del_in);
@@ -6231,8 +4675,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
6231
4675
  }
6232
4676
 
6233
4677
  static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6234
- const char *segment, int *map)
6235
- {
4678
+ const char *segment, int *map) {
6236
4679
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
6237
4680
  FrtOutStream *tix_out, *tis_out, *tfx_out, *frq_out, *prx_out;
6238
4681
  FrtInStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
@@ -6241,29 +4684,29 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6241
4684
  char *sr_segment = sr->si->name;
6242
4685
 
6243
4686
  sprintf(file_name, "%s.tix", segment);
6244
- tix_out = store_out->new_output(store_out, file_name);
4687
+ tix_out = store_out->new_output(store_out, segm_idx_name, file_name);
6245
4688
  sprintf(file_name, "%s.tix", sr_segment);
6246
- tix_in = store_in->open_input(store_in, file_name);
4689
+ tix_in = store_in->open_input(store_in, segm_idx_name, file_name);
6247
4690
 
6248
4691
  sprintf(file_name, "%s.tis", segment);
6249
- tis_out = store_out->new_output(store_out, file_name);
4692
+ tis_out = store_out->new_output(store_out, segm_idx_name, file_name);
6250
4693
  sprintf(file_name, "%s.tis", sr_segment);
6251
- tis_in = store_in->open_input(store_in, file_name);
4694
+ tis_in = store_in->open_input(store_in, segm_idx_name, file_name);
6252
4695
 
6253
4696
  sprintf(file_name, "%s.tfx", segment);
6254
- tfx_out = store_out->new_output(store_out, file_name);
4697
+ tfx_out = store_out->new_output(store_out, segm_idx_name, file_name);
6255
4698
  sprintf(file_name, "%s.tfx", sr_segment);
6256
- tfx_in = store_in->open_input(store_in, file_name);
4699
+ tfx_in = store_in->open_input(store_in, segm_idx_name, file_name);
6257
4700
 
6258
4701
  sprintf(file_name, "%s.frq", segment);
6259
- frq_out = store_out->new_output(store_out, file_name);
4702
+ frq_out = store_out->new_output(store_out, segm_idx_name, file_name);
6260
4703
  sprintf(file_name, "%s.frq", sr_segment);
6261
- frq_in = store_in->open_input(store_in, file_name);
4704
+ frq_in = store_in->open_input(store_in, segm_idx_name, file_name);
6262
4705
 
6263
4706
  sprintf(file_name, "%s.prx", segment);
6264
- prx_out = store_out->new_output(store_out, file_name);
4707
+ prx_out = store_out->new_output(store_out, segm_idx_name, file_name);
6265
4708
  sprintf(file_name, "%s.prx", sr_segment);
6266
- prx_in = store_in->open_input(store_in, file_name);
4709
+ prx_in = store_in->open_input(store_in, segm_idx_name, file_name);
6267
4710
 
6268
4711
  if (map) {
6269
4712
  int field_cnt = frt_is_read_u32(tfx_in);
@@ -6278,8 +4721,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6278
4721
  frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* index size */
6279
4722
  frt_os_write_vint(tfx_out, frt_is_read_vint(tfx_in)); /* dict size */
6280
4723
  }
6281
- }
6282
- else {
4724
+ } else {
6283
4725
  frt_is2os_copy_bytes(tfx_in, tfx_out, frt_is_length(tfx_in));
6284
4726
  }
6285
4727
  frt_is2os_copy_bytes(tix_in, tix_out, frt_is_length(tix_in));
@@ -6300,8 +4742,7 @@ static void iw_cp_terms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6300
4742
  }
6301
4743
 
6302
4744
  static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6303
- FrtSegmentInfo *si, int *map)
6304
- {
4745
+ FrtSegmentInfo *si, int *map) {
6305
4746
  int i;
6306
4747
  FrtFieldInfos *fis = IR(sr)->fis;
6307
4748
  const int field_cnt = fis->size;
@@ -6317,10 +4758,10 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6317
4758
  FrtStore *store = IR(sr)->store;
6318
4759
  int field_num = map ? map[i] : i;
6319
4760
 
6320
- norms_in = store->open_input(store, file_name_in);
4761
+ norms_in = store->open_input(store, segm_idx_name, file_name_in);
6321
4762
  frt_si_advance_norm_gen(si, field_num);
6322
4763
  si_norm_file_name(si, file_name_out, field_num);
6323
- norms_out = store_out->new_output(store_out, file_name_out);
4764
+ norms_out = store_out->new_output(store_out, segm_idx_name, file_name_out);
6324
4765
  frt_is2os_copy_bytes(norms_in, norms_out, frt_is_length(norms_in));
6325
4766
  frt_os_close(norms_out);
6326
4767
  frt_is_close(norms_in);
@@ -6398,7 +4839,6 @@ static void iw_add_segments(FrtIndexWriter *iw, FrtIndexReader *ir) {
6398
4839
  void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, const int r_cnt) {
6399
4840
  int i;
6400
4841
  pthread_mutex_lock(&iw->mutex);
6401
- iw_optimize_i(iw);
6402
4842
 
6403
4843
  for (i = 0; i < r_cnt; i++) {
6404
4844
  iw_add_segments(iw, readers[i]);
@@ -6410,6 +4850,5 @@ void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, const int
6410
4850
  frt_sis_write(iw->sis, iw->store, iw->deleter);
6411
4851
  pthread_mutex_unlock(&iw->store->mutex);
6412
4852
 
6413
- iw_optimize_i(iw);
6414
4853
  pthread_mutex_unlock(&iw->mutex);
6415
4854
  }