ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/index_rw.c CHANGED
@@ -1,4 +1,4 @@
1
- #include <index.h>
1
+ #include "index.h"
2
2
  #include <stdlib.h>
3
3
  #include <string.h>
4
4
  #include <array.h>
@@ -24,11 +24,11 @@ const char *VECTOR_EXTENSIONS[] = {
24
24
  };
25
25
 
26
26
  FerretConfig config = {
27
- 10, // default merge_factor
28
- 10, // default min_merge_docs
29
- INT_MAX, // default max_merge_docs
30
- 10000, // default max_field_length
31
- 128 // default term_index_interval
27
+ 10, /* default merge_factor */
28
+ 10, /* default min_merge_docs */
29
+ INT_MAX, /* default max_merge_docs */
30
+ 10000, /* default max_field_length */
31
+ 128 /* default term_index_interval */
32
32
  };
33
33
 
34
34
  /***************************************************************************
@@ -47,33 +47,32 @@ int co_eq(const void *key1, const void *key2)
47
47
  return (key1 == key2);
48
48
  }
49
49
 
50
- void co_destroy(void *p)
50
+ void co_destroy(CacheObject *self)
51
51
  {
52
- CacheObject *co = (CacheObject *)p;
53
- h_rem(co->ref_tab1, co->ref2, false);
54
- h_rem(co->ref_tab2, co->ref1, false);
55
- co->destroy(co->obj);
56
- free(co);
52
+ h_rem(self->ref_tab1, self->ref2, false);
53
+ h_rem(self->ref_tab2, self->ref1, false);
54
+ self->destroy(self->obj);
55
+ free(self);
57
56
  }
58
57
 
59
58
  CacheObject *co_create(HshTable *ref_tab1, HshTable *ref_tab2,
60
- void *ref1, void *ref2, void (*destroy)(void *p), void *obj)
59
+ void *ref1, void *ref2, free_ft destroy, void *obj)
61
60
  {
62
- CacheObject *co = ALLOC(CacheObject);
63
- h_set(ref_tab1, ref2, co);
64
- h_set(ref_tab2, ref1, co);
65
- co->ref_tab1 = ref_tab1;
66
- co->ref_tab2 = ref_tab2;
67
- co->ref1 = ref1;
68
- co->ref2 = ref2;
69
- co->destroy = destroy;
70
- co->obj = obj;
71
- return co;
61
+ CacheObject *self = ALLOC(CacheObject);
62
+ h_set(ref_tab1, ref2, self);
63
+ h_set(ref_tab2, ref1, self);
64
+ self->ref_tab1 = ref_tab1;
65
+ self->ref_tab2 = ref_tab2;
66
+ self->ref1 = ref1;
67
+ self->ref2 = ref2;
68
+ self->destroy = destroy;
69
+ self->obj = obj;
70
+ return self;
72
71
  }
73
72
 
74
73
  HshTable *co_hsh_create()
75
74
  {
76
- return h_new(&co_hash, &co_eq, NULL, &co_destroy);
75
+ return h_new(&co_hash, &co_eq, (free_ft)NULL, (free_ft)&co_destroy);
77
76
  }
78
77
 
79
78
  /***************************************************************************
@@ -84,39 +83,38 @@ HshTable *co_hsh_create()
84
83
 
85
84
  Posting *p_create(Term *term, int position, TVOffsetInfo *offset)
86
85
  {
87
- Posting *p = ALLOC(Posting);
88
- p->freq = 1;
89
- p->size = 1;
90
- p->term = term;
91
- p->positions = ALLOC(int);
92
- p->positions[0] = position;
93
- p->offsets = ALLOC(TVOffsetInfo *);
94
- p->offsets[0] = offset;
95
- return p;
86
+ Posting *self = ALLOC(Posting);
87
+ self->freq = 1;
88
+ self->size = 1;
89
+ self->term = term;
90
+ self->positions = ALLOC(int);
91
+ self->positions[0] = position;
92
+ self->offsets = ALLOC(TVOffsetInfo *);
93
+ self->offsets[0] = offset;
94
+ return self;
96
95
  }
97
96
 
98
- void p_destroy(void *p)
97
+ void p_destroy(Posting *self)
99
98
  {
100
- // the positions and offsets will be put in a TVTerm so no need to free
99
+ /* the positions and offsets will be put in a TVTerm so no need to free */
101
100
  int i;
102
- Posting *post = (Posting *)p;
103
- free(post->positions);
104
- for (i = 0; i < post->freq; i++)
105
- tvoi_destroy(post->offsets[i]);
106
- free(post->offsets);
107
- free(p);
101
+ free(self->positions);
102
+ for (i = 0; i < self->freq; i++)
103
+ tvoi_destroy(self->offsets[i]);
104
+ free(self->offsets);
105
+ free(self);
108
106
  }
109
107
 
110
- void p_add_occurance(Posting *p, int position, TVOffsetInfo *offset)
108
+ void p_add_occurance(Posting *self, int position, TVOffsetInfo *offset)
111
109
  {
112
- if (p->freq >= p->size) {
113
- p->size *= 2;
114
- REALLOC_N(p->positions, int, p->size);
115
- REALLOC_N(p->offsets, TVOffsetInfo *, p->size);
110
+ if (self->freq >= self->size) {
111
+ self->size *= 2;
112
+ REALLOC_N(self->positions, int, self->size);
113
+ REALLOC_N(self->offsets, TVOffsetInfo *, self->size);
116
114
  }
117
- p->positions[p->freq] = position;
118
- p->offsets[p->freq] = offset;
119
- p->freq++;
115
+ self->positions[self->freq] = position;
116
+ self->offsets[self->freq] = offset;
117
+ self->freq++;
120
118
  }
121
119
 
122
120
  inline int p_cmp(const void *const p1, const void *const p2)
@@ -137,47 +135,49 @@ DocumentWriter *dw_open(Store *store,
137
135
  int max_field_length,
138
136
  int term_index_interval)
139
137
  {
140
- DocumentWriter *dw = ALLOC(DocumentWriter);
141
- dw->store = store;
142
- dw->analyzer = analyzer;
143
- dw->similarity = similarity;
144
- dw->fis = NULL;
145
- dw->postingtable = h_new(&term_hash, &term_eq, &term_destroy, &p_destroy);
146
- dw->max_field_length = max_field_length;
147
- dw->term_index_interval = term_index_interval;
148
- return dw;
138
+ DocumentWriter *self = ALLOC(DocumentWriter);
139
+ self->store = store;
140
+ self->analyzer = analyzer;
141
+ self->similarity = similarity;
142
+ self->fis = NULL;
143
+ self->postingtable = h_new(&term_hash, &term_eq,
144
+ (free_ft)&term_destroy,
145
+ (free_ft)&p_destroy);
146
+ self->max_field_length = max_field_length;
147
+ self->term_index_interval = term_index_interval;
148
+ return self;
149
149
  }
150
150
 
151
- void dw_close(DocumentWriter *dw)
151
+ void dw_close(DocumentWriter *self)
152
152
  {
153
- if (dw->fis) fis_destroy(dw->fis);
154
- h_destroy(dw->postingtable);
155
- free(dw);
153
+ if (self->fis) fis_destroy(self->fis);
154
+ h_destroy(self->postingtable);
155
+ free(self);
156
156
  }
157
157
 
158
- void dw_add_position(DocumentWriter *dw, char *field, char *text,
158
+ void dw_add_position(DocumentWriter *self, char *field, char *text,
159
159
  int position, TVOffsetInfo *offset)
160
160
  {
161
161
  Term termbuf = {field, text}, *term;
162
- Posting *p = (Posting *)h_get(dw->postingtable, &termbuf);
162
+ Posting *p = (Posting *)h_get(self->postingtable, &termbuf);
163
163
 
164
- if (p) { // word seen before
165
- // double the size of posting to make room for more posts.
164
+ if (p) { /* word seen before */
166
165
  if (p->freq >= p->size) {
166
+ /* double size of posting to make room for more posts. */
167
167
  p->size <<= 1;
168
168
  REALLOC_N(p->positions, int, p->size);
169
169
  p->offsets = REALLOC_N(p->offsets, TVOffsetInfo *, p->size);
170
170
  }
171
- p->positions[p->freq] = position; // add new position
172
- p->offsets[p->freq] = offset; // add new position
173
- p->freq++; // update frequency
174
- } else { // word not seen before
171
+ p->positions[p->freq] = position; /* add new position */
172
+ p->offsets[p->freq] = offset; /* add new offset */
173
+ p->freq++; /* update frequency */
174
+ } else { /* word not seen before */
175
175
  term = term_create(field, text);
176
- h_set(dw->postingtable, term, p_create(term, position, offset));
176
+ h_set(self->postingtable, term, p_create(term, position, offset));
177
177
  }
178
178
  }
179
179
 
180
- void dw_invert_doc(DocumentWriter *dw, Document *doc)
180
+ void dw_invert_doc(DocumentWriter *self, Document *doc)
181
181
  {
182
182
  int i;
183
183
  int dfcnt = doc->dfcnt;
@@ -191,69 +191,74 @@ void dw_invert_doc(DocumentWriter *dw, Document *doc)
191
191
  for (i = 0; i < dfcnt; i++) {
192
192
  field = fields[i];
193
193
  field_name = field->name;
194
- fi = ((FieldInfo *)ht_get(dw->fis->by_name, field_name));
194
+ fi = ((FieldInfo *)ht_get(self->fis->by_name, field_name));
195
195
  field_number = fi->number;
196
196
 
197
- length = dw->field_lengths[field_number];
198
- offset = dw->field_offsets[field_number];
199
- position = dw->field_positions[field_number];
197
+ length = self->field_lengths[field_number];
198
+ offset = self->field_offsets[field_number];
199
+ position = self->field_positions[field_number];
200
200
 
201
201
  if (fi->is_indexed) {
202
- if (!field->is_tokenized) {// un-tokenized field
202
+ if (!field->is_tokenized) { /* un-tokenized field */
203
203
  text = field->data;
204
- slen = strlen(text);
204
+ slen = (int)strlen(text);
205
205
  if (fi->store_offset) {
206
- dw_add_position(dw, field_name, text, position,
206
+ dw_add_position(self, field_name, text, position,
207
207
  tvoi_create(offset, offset+slen));
208
208
  } else {
209
- dw_add_position(dw, field_name, text, position, NULL);
209
+ dw_add_position(self, field_name, text, position, NULL);
210
210
  }
211
211
  offset += slen;
212
212
  length++;
213
213
  } else {
214
214
 
215
- // Tokenize field and add to posting_table
216
- stream = a_get_ts(dw->analyzer, field_name, field->data);
215
+ /* Tokenize field and add to posting_table */
216
+ stream = a_get_ts(self->analyzer, field_name, field->data);
217
217
 
218
218
  while ((token = ts_next(stream)) != NULL) {
219
219
  position += (token->pos_inc - 1);
220
220
 
221
221
  if (fi->store_offset) {
222
- dw_add_position(dw,
222
+ dw_add_position(self,
223
223
  field_name,
224
224
  token->text,
225
225
  position,
226
226
  tvoi_create(offset + token->start, offset + token->end));
227
227
  position++;
228
228
  } else {
229
- dw_add_position(dw, field_name, token->text, position, NULL);
229
+ dw_add_position(self, field_name, token->text, position, NULL);
230
230
  position++;
231
231
  }
232
232
 
233
233
  length++;
234
- // stop if we reach the max field length
235
- if (length > dw->max_field_length)
234
+ /* stop if we reach the max field length */
235
+ if (length > self->max_field_length) {
236
236
  break;
237
+ }
237
238
  }
238
239
 
239
- if (token)
240
+ if (token) {
240
241
  offset += token->end + 1;
242
+ }
241
243
  }
242
- dw->field_lengths[field_number] = length;
243
- dw->field_offsets[field_number] = offset;
244
- dw->field_positions[field_number] = position;
245
- dw->field_boosts[field_number] *= field->boost;
244
+ self->field_lengths[field_number] = length;
245
+ self->field_offsets[field_number] = offset;
246
+ self->field_positions[field_number] = position;
247
+ self->field_boosts[field_number] *= field->boost;
246
248
  }
247
249
  }
248
250
  }
249
251
 
250
- Posting **dw_sort_posting_table(DocumentWriter *dw)
252
+ Posting **dw_sort_posting_table(DocumentWriter *self)
251
253
  {
252
- HshTable *ht = dw->postingtable;
253
- int i;
254
- dw->pcnt = i = ht->used;
255
- Posting **postings = ALLOC_N(Posting *, i);
254
+ HshTable *ht = self->postingtable;
256
255
  HshEntry *he = ht->table;
256
+ Posting **postings;
257
+ int i;
258
+
259
+ self->pcnt = i = ht->used;
260
+ postings = ALLOC_N(Posting *, i);
261
+
257
262
  while (i > 0) {
258
263
  if (he->value != NULL) {
259
264
  i--;
@@ -261,16 +266,16 @@ Posting **dw_sort_posting_table(DocumentWriter *dw)
261
266
  }
262
267
  he++;
263
268
  }
264
- qsort(postings, dw->pcnt, sizeof(Posting *), &p_cmp);
269
+ qsort(postings, self->pcnt, sizeof(Posting *), &p_cmp);
265
270
  return postings;
266
271
  }
267
272
 
268
- void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
273
+ void dw_write_postings(DocumentWriter *self, Posting **postings, char *segment)
269
274
  {
270
275
  OutStream * volatile freq_out = NULL, * volatile prox_out = NULL;
271
276
  TermInfosWriter * volatile tiw = NULL;
272
277
  TermVectorsWriter * volatile tvw = NULL;
273
- Store *store = dw->store;
278
+ Store *store = self->store;
274
279
  TermInfo * volatile ti = NULL;
275
280
  Posting *posting;
276
281
  int i, j, posting_freq, position, last_position;
@@ -278,31 +283,31 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
278
283
  strcpy(fname, segment);
279
284
 
280
285
  TRY
281
- //open files for inverse index storage
286
+ /* open files for inverse index storage */
282
287
  sprintf(fname, "%s.frq", segment);
283
288
  freq_out = store->create_output(store, fname);
284
289
  sprintf(fname, "%s.prx", segment);
285
290
  prox_out = store->create_output(store, fname);
286
- tiw = tiw_open(store, segment, dw->fis, dw->term_index_interval);
291
+ tiw = tiw_open(store, segment, self->fis, self->term_index_interval);
287
292
  ti = ti_create(0, 0, 0, 0);
288
293
 
289
- for (i = 0; i < dw->pcnt; i++) {
294
+ for (i = 0; i < self->pcnt; i++) {
290
295
  posting = postings[i];
291
296
 
292
- // add an entry to the dictionary with pointers to prox and freq_out files
297
+ /* add an entry to dictionary with pointers to prox and freq_out files */
293
298
  ti_set(ti, 1, os_pos(freq_out), os_pos(prox_out), -1);
294
299
  tiw_add(tiw, posting->term, ti);
295
300
 
296
- // add an entry to the freq_out file
301
+ /* add an entry to the freq_out file */
297
302
  posting_freq = posting->freq;
298
- if (posting_freq == 1) { // optimize freq=1
299
- os_write_vint(freq_out, 1); // set low bit of doc num.
303
+ if (posting_freq == 1) { /* optimize freq=1 */
304
+ os_write_vint(freq_out, 1); /* set low bit of doc num */
300
305
  } else {
301
- os_write_vint(freq_out, 0); // the doc number
302
- os_write_vint(freq_out, posting_freq); // frequency in doc
306
+ os_write_vint(freq_out, 0); /* the doc number */
307
+ os_write_vint(freq_out, posting_freq); /* frequency in doc */
303
308
  }
304
309
 
305
- last_position = 0; // write positions
310
+ last_position = 0; /* write positions */
306
311
 
307
312
  for (j = 0; j < posting_freq; j++) {
308
313
  position = posting->positions[j];
@@ -310,16 +315,16 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
310
315
  last_position = position;
311
316
  }
312
317
 
313
- // check to see if we switched to a new field
318
+ /* check to see if we switched to a new field */
314
319
  term_field = posting->term->field;
315
320
  if (curr_field != term_field) {
316
321
  FieldInfo *fi;
317
- // changing field - see if there is something to save
322
+ /* changing field - see if there is something to save */
318
323
  curr_field = term_field;
319
- fi = (FieldInfo *)ht_get(dw->fis->by_name, curr_field);
324
+ fi = (FieldInfo *)ht_get(self->fis->by_name, curr_field);
320
325
  if (fi->store_tv) {
321
326
  if (tvw == NULL) {
322
- tvw = tvw_open(store, segment, dw->fis);
327
+ tvw = tvw_open(store, segment, self->fis);
323
328
  tvw_open_doc(tvw);
324
329
  }
325
330
  tvw_open_field(tvw, curr_field);
@@ -328,7 +333,7 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
328
333
  tvw_close_field(tvw);
329
334
  }
330
335
  }
331
- // tvw->curr_field != NULL implies field is still open
336
+ /* tvw->curr_field != NULL implies field is still open */
332
337
  if (tvw != NULL && tvw->curr_field != NULL) {
333
338
  tvw_add_term(tvw, posting->term->text, posting_freq, posting->positions, posting->offsets);
334
339
  }
@@ -338,8 +343,8 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
338
343
  tvw_close_doc(tvw);
339
344
  tvw_close(tvw);
340
345
  }
341
- // make an effort to close all streams we can but remember and re-raise
342
- // the last exception encountered in this process
346
+ /* make an effort to close all streams we can but remember and re-raise
347
+ * the last exception encountered in this process */
343
348
  if (freq_out) os_close(freq_out);
344
349
  if (prox_out) os_close(prox_out);
345
350
  if (tiw) tiw_close(tiw);
@@ -347,24 +352,25 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
347
352
  XENDTRY
348
353
  }
349
354
 
350
- void dw_write_norms(DocumentWriter *dw, char *segment)
355
+ void dw_write_norms(DocumentWriter *self, char *segment)
351
356
  {
352
357
  int i;
353
358
  float norm;
354
359
  OutStream *norms_out;
355
360
  char fname[SEGMENT_NAME_MAX_LENGTH];
356
- FieldInfos *fis = dw->fis;
361
+ FieldInfos *fis = self->fis;
357
362
  FieldInfo *fi;
358
363
 
359
364
  for (i = 0; i < fis->fcnt; i++) {
360
365
  fi = fis->by_number[i];
361
366
 
362
367
  if (fi->is_indexed && !fi->omit_norms) {
363
- norm = dw->field_boosts[i] * sim_length_norm(dw->similarity, fi->name, dw->field_lengths[i]);
368
+ norm = self->field_boosts[i] *
369
+ sim_length_norm(self->similarity, fi->name, self->field_lengths[i]);
364
370
  sprintf(fname, "%s.f%d", segment, i);
365
- norms_out = dw->store->create_output(dw->store, fname);
371
+ norms_out = self->store->create_output(self->store, fname);
366
372
  TRY
367
- os_write_byte(norms_out, sim_encode_norm(dw->similarity, norm));
373
+ os_write_byte(norms_out, sim_encode_norm(self->similarity, norm));
368
374
  XFINALLY
369
375
  os_close(norms_out);
370
376
  XENDTRY
@@ -372,49 +378,54 @@ void dw_write_norms(DocumentWriter *dw, char *segment)
372
378
  }
373
379
  }
374
380
 
375
- void dw_add_doc(DocumentWriter *dw, char *segment, Document *doc)
381
+ void dw_add_doc(DocumentWriter *self, char *segment, Document *doc)
376
382
  {
383
+ Posting **postings;
384
+ FieldsWriter *fw;
377
385
  int i;
378
- // write field names
379
- dw->fis = fis_create();
380
- fis_add_doc(dw->fis, doc);
381
- fis_write(dw->fis, dw->store, segment, ".fnm");
382
386
 
383
- // write field values
384
- FieldsWriter *fw = fw_open(dw->store, segment, dw->fis);
387
+ /* write field names */
388
+ self->fis = fis_create();
389
+ fis_add_doc(self->fis, doc);
390
+ fis_write(self->fis, self->store, segment, ".fnm");
391
+
392
+ /* write field values */
393
+ fw = fw_open(self->store, segment, self->fis);
385
394
  TRY
386
395
  fw_add_doc(fw, doc);
387
396
  XFINALLY
388
397
  fw_close(fw);
389
398
  XENDTRY
390
399
 
391
- // invert doc into posting_table
392
- h_clear(dw->postingtable); // clear posting_table
400
+ /* invert doc into posting_table */
393
401
 
394
- dw->field_boosts = ALLOC_N(float, dw->fis->fcnt);
395
- dw->field_lengths = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
396
- dw->field_offsets = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
397
- dw->field_positions = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
402
+ h_clear(self->postingtable); /* clear posting_table */
398
403
 
399
- for (i = 0; i < dw->fis->fcnt; i++)
400
- dw->field_boosts[i] = doc->boost;
404
+ self->field_boosts = ALLOC_N(float, self->fis->fcnt);
405
+ self->field_lengths = ALLOC_AND_ZERO_N(int, self->fis->fcnt);
406
+ self->field_offsets = ALLOC_AND_ZERO_N(int, self->fis->fcnt);
407
+ self->field_positions = ALLOC_AND_ZERO_N(int, self->fis->fcnt);
401
408
 
402
- dw_invert_doc(dw, doc);
409
+ for (i = 0; i < self->fis->fcnt; i++) {
410
+ self->field_boosts[i] = doc->boost;
411
+ }
412
+
413
+ dw_invert_doc(self, doc);
403
414
 
404
- // sort posting_table into an array
405
- Posting **postings = dw_sort_posting_table(dw);
415
+ /* sort posting_table into an array */
416
+ postings = dw_sort_posting_table(self);
406
417
 
407
- // write postings
408
- dw_write_postings(dw, postings, segment);
418
+ /* write postings */
419
+ dw_write_postings(self, postings, segment);
409
420
  free(postings);
410
421
 
411
- // write norms of indexed fields
412
- dw_write_norms(dw, segment);
422
+ /* write norms of indexed fields */
423
+ dw_write_norms(self, segment);
413
424
 
414
- free(dw->field_boosts);
415
- free(dw->field_lengths);
416
- free(dw->field_offsets);
417
- free(dw->field_positions);
425
+ free(self->field_boosts);
426
+ free(self->field_lengths);
427
+ free(self->field_offsets);
428
+ free(self->field_positions);
418
429
  }
419
430
 
420
431
  /****************************************************************************
@@ -432,9 +443,8 @@ SegmentInfo *si_create(char *name, int doc_cnt, Store *store)
432
443
  return si;
433
444
  }
434
445
 
435
- void si_destroy(void *p)
446
+ void si_destroy(SegmentInfo *si)
436
447
  {
437
- SegmentInfo *si = (SegmentInfo *)p;
438
448
  free(si->name);
439
449
  free(si);
440
450
  }
@@ -501,21 +511,19 @@ SegmentInfos *sis_create()
501
511
  return sis;
502
512
  }
503
513
 
504
- void sis_destroy_not_infos(void *p)
514
+ void sis_destroy_not_infos(SegmentInfos *sis)
505
515
  {
506
- SegmentInfos *sis = (SegmentInfos *)p;
507
516
  free(sis->segs);
508
- free(p);
517
+ free(sis);
509
518
  }
510
519
 
511
- void sis_destroy(void *p)
520
+ void sis_destroy(SegmentInfos *sis)
512
521
  {
513
522
  int i;
514
- SegmentInfos *sis = (SegmentInfos *)p;
515
523
  for (i = 0; i < sis->scnt; i++)
516
524
  si_destroy(sis->segs[i]);
517
525
  free(sis->segs);
518
- free(p);
526
+ free(sis);
519
527
  }
520
528
 
521
529
  void sis_add_si(SegmentInfos *sis, SegmentInfo *si)
@@ -533,8 +541,9 @@ void sis_del_at(SegmentInfos *sis, int at)
533
541
  int i;
534
542
  si_destroy(sis->segs[at]);
535
543
  sis->scnt--;
536
- for (i = at; i < sis->scnt; i++)
544
+ for (i = at; i < sis->scnt; i++) {
537
545
  sis->segs[i] = sis->segs[i+1];
546
+ }
538
547
  }
539
548
 
540
549
  void sis_del_from_to(SegmentInfos *sis, int from, int to)
@@ -561,24 +570,25 @@ void sis_clear(SegmentInfos *sis)
561
570
  void sis_read(SegmentInfos *sis, Store *store)
562
571
  {
563
572
  int doc_cnt;
573
+ int seg_count;
574
+ int i;
564
575
  char *name;
565
576
  InStream *is = store->open_input(store, SEGMENT_FILENAME);
566
577
 
567
578
  TRY
568
579
 
569
580
  sis->format = is_read_int(is);
570
- if (sis->format < 0) { // file contains explicit format info
571
- // check that it is a format we can understand
581
+ if (sis->format < 0) { /* file contains explicit format info */
582
+ /* check that it is a format we can understand */
572
583
  if (sis->format < FORMAT)
573
- RAISE(ERROR, FORMAT_VERSION_ERROR_MSG);
574
- sis->version = is_read_long(is);
575
- sis->counter = is_read_int(is);
576
- } else { // file is in old format without explicit format info
584
+ RAISE(EXCEPTION, FORMAT_VERSION_ERROR_MSG);
585
+ sis->version = (uint)is_read_long(is);
586
+ sis->counter = (int)is_read_int(is);
587
+ } else { /* file is in old format without explicit format info */
577
588
  sis->counter = sis->format;
578
589
  }
579
590
 
580
- int seg_count = is_read_int(is);
581
- int i;
591
+ seg_count = is_read_int(is);
582
592
  for (i = 0; i < seg_count; i++) {
583
593
  name = is_read_string(is);
584
594
  doc_cnt = is_read_int(is);
@@ -586,11 +596,12 @@ void sis_read(SegmentInfos *sis, Store *store)
586
596
  }
587
597
 
588
598
  if (sis->format >= 0) {
589
- // in old format the version number may be at the end of the file
590
- if (is_pos(is) >= is_length(is))
591
- sis->version = 0; // old file format without version number
592
- else
593
- sis->version = is_read_long(is); // read version
599
+ /* in old format the version number may be at the end of the file */
600
+ if (is_pos(is) >= is_length(is)) {
601
+ sis->version = 0; /* old file format without version number */
602
+ } else {
603
+ sis->version = (int)is_read_long(is); /* read version */
604
+ }
594
605
  }
595
606
  XFINALLY
596
607
  is_close(is);
@@ -604,7 +615,7 @@ void sis_write(SegmentInfos *sis, Store *store)
604
615
  OutStream *os = store->create_output(store, TEMPORARY_SEGMENT_FILENAME);
605
616
  TRY
606
617
  os_write_int(os, FORMAT);
607
- os_write_long(os, ++(sis->version)); // every write changes the index
618
+ os_write_long(os, ++(sis->version)); /* every write changes the index */
608
619
  os_write_int(os, sis->counter);
609
620
  os_write_int(os, sis->scnt);
610
621
  for (i = 0; i < sis->scnt; i++) {
@@ -617,24 +628,27 @@ void sis_write(SegmentInfos *sis, Store *store)
617
628
  os_close(os);
618
629
  XENDTRY
619
630
 
620
- //install new segment info
631
+ /* install new segment info */
621
632
  store->rename(store, TEMPORARY_SEGMENT_FILENAME, SEGMENT_FILENAME);
622
633
  }
623
634
 
624
635
  int sis_read_current_version(Store *store)
625
636
  {
626
- if (!store->exists(store, SEGMENT_FILENAME))
627
- return 0;
628
- InStream *is = store->open_input(store, SEGMENT_FILENAME);
637
+ InStream *is;
638
+ SegmentInfos *sis;
629
639
  int format = 0;
630
640
  int version = 0;
631
641
 
642
+ if (!store->exists(store, SEGMENT_FILENAME))
643
+ return 0;
644
+ is = store->open_input(store, SEGMENT_FILENAME);
645
+
632
646
  TRY
633
647
  format = is_read_int(is);
634
648
  if (format < 0) {
635
649
  if (format < FORMAT)
636
- RAISE(ERROR, FORMAT_VERSION_ERROR_MSG);
637
- version = is_read_long(is);
650
+ RAISE(EXCEPTION, FORMAT_VERSION_ERROR_MSG);
651
+ version = (int)is_read_long(is);
638
652
  }
639
653
  XFINALLY
640
654
  is_close(is);
@@ -643,11 +657,11 @@ int sis_read_current_version(Store *store)
643
657
  if (format < 0)
644
658
  return version;
645
659
 
646
- // We cannot be sure about the format of the file.
647
- // Therefore we have to read the whole file and cannot simply
648
- // seek to the version entry.
660
+ /* We cannot be sure about the format of the file.
661
+ * Therefore we have to read the whole file and cannot simply
662
+ * seek to the version entry. */
649
663
 
650
- SegmentInfos *sis = sis_create();
664
+ sis = sis_create();
651
665
  sis_read(sis, store);
652
666
  version = sis->version;
653
667
  sis_destroy(sis);
@@ -660,8 +674,10 @@ int sis_read_current_version(Store *store)
660
674
  *
661
675
  ****************************************************************************/
662
676
 
663
- IndexWriter *iw_open(Store *store, Analyzer *analyzer,
664
- bool create, bool close_store, bool close_analyzer)
677
+ /**
678
+ * Deletes the analyzer by default but leaves the store by default
679
+ */
680
+ IndexWriter *iw_open(Store *store, Analyzer *analyzer, bool create)
665
681
  {
666
682
  IndexWriter *iw = ALLOC(IndexWriter);
667
683
  if (create)
@@ -674,15 +690,14 @@ IndexWriter *iw_open(Store *store, Analyzer *analyzer,
674
690
  iw->term_index_interval = config.term_index_interval;
675
691
  iw->use_compound_file = true;
676
692
  iw->store = store;
677
- iw->close_store = close_store;
678
- iw->close_analyzer = close_analyzer;
693
+ ref(store);
679
694
  iw->analyzer = analyzer;
680
695
  iw->sis = sis_create();
681
696
  iw->similarity = sim_create_default();
682
697
  iw->ram_store = open_ram_store();
683
698
 
684
699
  mutex_lock(&store->mutex);
685
- // keep the write_lock obtained until the IndexWriter is closed.
700
+ /* keep the write_lock obtained until the IndexWriter is closed. */
686
701
  iw->write_lock = store->open_lock(store, WRITE_LOCK_NAME);
687
702
  if (!iw->write_lock->obtain(iw->write_lock)) {
688
703
  RAISE(STATE_ERROR, WRITE_LOCK_ERROR_MSG);
@@ -695,7 +710,7 @@ IndexWriter *iw_open(Store *store, Analyzer *analyzer,
695
710
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
696
711
  }
697
712
  TRY
698
- // commit the index
713
+ /* commit the index */
699
714
  store->clear(store);
700
715
  sis_write(iw->sis, store);
701
716
  XFINALLY
@@ -714,8 +729,9 @@ const char base36_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
714
729
  char *new_segment_name(int counter)
715
730
  {
716
731
  char buf[SEGMENT_NAME_MAX_LENGTH];
717
- buf[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
718
732
  int i;
733
+
734
+ buf[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
719
735
  for (i = SEGMENT_NAME_MAX_LENGTH - 2; ; i--) {
720
736
  buf[i] = base36_digitmap[counter%36];
721
737
  counter /= 36;
@@ -749,8 +765,8 @@ void delete_files(Array *file_names, Store *store)
749
765
  Array *sr_file_names(IndexReader *ir);
750
766
  void iw_delete_segments(IndexWriter *iw, IndexReader **segment_readers, int del_cnt)
751
767
  {
752
- // The java version keeps a record of files that it couldn't delete. This
753
- // shouldn't be a problem on linux I hope.
768
+ /* The java version keeps a record of files that it couldn't delete. This
769
+ * shouldn't be a problem on linux I hope. */
754
770
  IndexReader *ir;
755
771
  int i;
756
772
  for (i = 0; i < del_cnt; i++) {
@@ -761,22 +777,25 @@ void iw_delete_segments(IndexWriter *iw, IndexReader **segment_readers, int del_
761
777
 
762
778
  void make_compound_file(IndexWriter *iw, char *merged_name, SegmentMerger *merger)
763
779
  {
780
+ Array *files_to_delete;
781
+ Lock *commit_lock;
764
782
  char merged_tmp[SEGMENT_NAME_MAX_LENGTH], merged_cfs[SEGMENT_NAME_MAX_LENGTH];
765
783
 
766
784
  mutex_lock(&iw->store->mutex);
767
785
  sprintf(merged_tmp, "%s.tmp", merged_name);
768
786
  sprintf(merged_cfs, "%s.cfs", merged_name);
769
787
 
770
- Array *files_to_delete = sm_create_compound_file(merger, merged_tmp);
771
- Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
788
+ files_to_delete = sm_create_compound_file(merger, merged_tmp);
789
+ commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
772
790
 
773
791
  if (!commit_lock->obtain(commit_lock)) {
774
792
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
775
793
  }
776
794
 
777
- // make compound file visible for SegmentReaders
795
+ /* make compound file visible for SegmentReaders */
778
796
  iw->store->rename(iw->store, merged_tmp, merged_cfs);
779
- // delete now unused files of segment
797
+
798
+ /* delete now unused files of segment */
780
799
  delete_files(files_to_delete, iw->store);
781
800
 
782
801
  commit_lock->release(commit_lock);
@@ -787,7 +806,9 @@ void make_compound_file(IndexWriter *iw, char *merged_name, SegmentMerger *merge
787
806
  void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segment)
788
807
  {
789
808
  int i;
790
- IndexReader *segments_to_delete[max_segment - min_segment];
809
+ int merged_doc_count;
810
+ Lock *commit_lock;
811
+ IndexReader **segments_to_delete = ALLOC_N(IndexReader *, max_segment - min_segment);
791
812
  int del_cnt = 0;
792
813
 
793
814
  char *merged_name = new_segment_name(iw->sis->counter++);
@@ -797,31 +818,31 @@ void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segmen
797
818
 
798
819
 
799
820
  for (i = min_segment; i < max_segment; i++) {
800
- reader = sr_open(iw->sis, i, false, false);
821
+ reader = sr_open(iw->sis, i, false);
801
822
  sm_add(merger, reader);
802
- if ((reader->store == iw->store) || // if we own the directory
823
+ if ((reader->store == iw->store) || /* if we own the directory */
803
824
  (reader->store == iw->ram_store)) {
804
- segments_to_delete[del_cnt++] = reader; // queue segment for deletion
825
+ segments_to_delete[del_cnt++] = reader; /* queue segment for deletion */
805
826
  }
806
827
  }
807
828
 
808
- int merged_doc_count = sm_merge(merger);
829
+ merged_doc_count = sm_merge(merger);
809
830
 
810
831
  sis_del_from_to(iw->sis, min_segment, max_segment);
811
832
 
812
833
  sis_add_si(iw->sis, si_create(merged_name, merged_doc_count, iw->store));
813
834
 
814
- // close readers before we attempt to delete now-obsolete segments
835
+ /* close readers before we attempt to delete now-obsolete segments */
815
836
 
816
837
  mutex_lock(&iw->store->mutex);
817
- Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
838
+ commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
818
839
  if (!commit_lock->obtain(commit_lock)) {
819
840
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
820
841
  }
821
- // commit the index
842
+ /* commit the index */
822
843
  sis_write(iw->sis, iw->store);
823
844
  iw_delete_segments(iw, segments_to_delete, del_cnt);
824
- //
845
+
825
846
  commit_lock->release(commit_lock);
826
847
  iw->store->close_lock(commit_lock);
827
848
  mutex_unlock(&iw->store->mutex);
@@ -830,6 +851,7 @@ void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segmen
830
851
  make_compound_file(iw, merged_name, merger);
831
852
  }
832
853
 
854
+ free(segments_to_delete);
833
855
  sm_destroy(merger);
834
856
  }
835
857
 
@@ -845,23 +867,25 @@ void iw_maybe_merge_segments(IndexWriter *iw)
845
867
  SegmentInfo *si;
846
868
 
847
869
  while (target_merge_docs <= iw->max_merge_docs) {
848
- // find segments smaller than current target size
870
+ /* find segments smaller than current target size */
849
871
  min_segment = iw->sis->scnt - 1;
850
872
  merge_docs = 0;
851
873
  while (min_segment >= 0) {
852
874
  si = iw->sis->segs[min_segment];
853
- if (si->doc_cnt >= target_merge_docs)
875
+ if (si->doc_cnt >= target_merge_docs) {
854
876
  break;
877
+ }
855
878
  merge_docs += si->doc_cnt;
856
879
  min_segment -= 1;
857
880
  }
858
881
 
859
- if (merge_docs >= target_merge_docs) // found a merge to do
882
+ if (merge_docs >= target_merge_docs) { /* found a merge to do */
860
883
  iw_merge_segments(iw, min_segment + 1);
861
- else
884
+ } else {
862
885
  break;
886
+ }
863
887
 
864
- target_merge_docs *= iw->merge_factor; // increase target size
888
+ target_merge_docs *= iw->merge_factor; /* increase target size */
865
889
  }
866
890
  }
867
891
 
@@ -883,12 +907,14 @@ void iw_flush_ram_segments(IndexWriter *iw)
883
907
  * that wasn't the ram segment. But if it fit's in with the merge
884
908
  * factor, why not merge it. Otherwise we leave it and increment min_seg
885
909
  */
886
- if (min_segment < 0 || // add one FS segment?
887
- (doc_count + segs[min_segment]->doc_cnt) > iw->merge_factor ||
888
- (segs[iw->sis->scnt-1]->store != iw->ram_store))
910
+ if ((min_segment < 0) || /* add one FS segment? */
911
+ ((doc_count + segs[min_segment]->doc_cnt) > iw->merge_factor) ||
912
+ (segs[iw->sis->scnt - 1]->store != iw->ram_store)) {
889
913
  min_segment++;
890
- if (min_segment >= iw->sis->scnt)
914
+ }
915
+ if (min_segment >= iw->sis->scnt) {
891
916
  return;
917
+ }
892
918
  iw_merge_segments(iw, min_segment);
893
919
  }
894
920
 
@@ -937,17 +963,16 @@ void iw_close(IndexWriter *iw)
937
963
  {
938
964
  mutex_lock(&iw->mutex);
939
965
  iw_flush_ram_segments(iw);
940
- ram_close(iw->ram_store);
966
+ store_deref(iw->ram_store);
941
967
  sis_destroy(iw->sis);
942
968
 
943
969
  sim_destroy(iw->similarity);
944
- if (iw->close_analyzer) a_destroy(iw->analyzer);
970
+ a_deref(iw->analyzer);
945
971
 
946
972
  iw->write_lock->release(iw->write_lock);
947
973
  iw->store->close_lock(iw->write_lock);
948
974
 
949
- if (iw->close_store)
950
- store_close(iw->store);
975
+ store_deref(iw->store);
951
976
  mutex_destroy(&iw->mutex);
952
977
  free(iw);
953
978
  }
@@ -957,13 +982,13 @@ void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
957
982
  int i, j, end, start;
958
983
 
959
984
  mutex_lock(&iw->mutex);
960
- iw_optimize_internal(iw); // start with zero or 1 seg
985
+ iw_optimize_internal(iw); /* start with zero or 1 seg */
961
986
 
962
987
  start = iw->sis->scnt;
963
988
 
964
989
  for (i = 0; i < cnt; i++) {
965
990
  Store *store = stores[i];
966
- SegmentInfos *sis = sis_create(); // read infos from dir
991
+ SegmentInfos *sis = sis_create(); /* read infos from dir */
967
992
  sis_read(sis, store);
968
993
 
969
994
  for (j = 0; j < sis->scnt; j++) {
@@ -973,7 +998,7 @@ void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
973
998
  sis_destroy_not_infos(sis);
974
999
  }
975
1000
 
976
- // merge newly added segments in log(n) passes
1001
+ /* merge newly added segments in log(n) passes */
977
1002
  while (iw->sis->scnt > start + iw->merge_factor) {
978
1003
  for (i = start + 1; i < iw->sis->scnt; i++) {
979
1004
  end = MIN(iw->sis->scnt, i + iw->merge_factor);
@@ -983,7 +1008,7 @@ void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
983
1008
  }
984
1009
  }
985
1010
 
986
- // final cleanup
1011
+ /* final cleanup */
987
1012
  iw_optimize_internal(iw);
988
1013
  mutex_unlock(&iw->mutex);
989
1014
  }
@@ -996,16 +1021,20 @@ void iw_add_readers(IndexWriter *iw, IndexReader **irs, int cnt)
996
1021
  {
997
1022
  IndexReader *ir = NULL;
998
1023
  int i, del_cnt = 0;
999
-
1024
+ int doc_count;
1025
+ char *merged_name;
1026
+ SegmentMerger *merger;
1027
+ Lock *commit_lock;
1028
+
1000
1029
  mutex_lock(&iw->mutex);
1001
- iw_optimize_internal(iw); // start with zero or 1 seg
1030
+ iw_optimize_internal(iw); /* start with zero or 1 seg */
1002
1031
 
1003
- char *merged_name = new_segment_name(iw->sis->counter++);
1032
+ merged_name = new_segment_name(iw->sis->counter++);
1004
1033
 
1005
- SegmentMerger *merger = sm_create(iw->store, merged_name, iw->term_index_interval);
1006
- merger->readers->free_elem = NULL; // don't close readers
1034
+ merger = sm_create(iw->store, merged_name, iw->term_index_interval);
1035
+ merger->readers->free_elem = NULL; /* don't close readers */
1007
1036
 
1008
- if (iw->sis->scnt == 1) {// add existing index, if any
1037
+ if (iw->sis->scnt == 1) { /* add existing index, if any */
1009
1038
  ir = sr_open_si(iw->sis->segs[0]);
1010
1039
  sm_add(merger, ir);
1011
1040
  del_cnt = 1;
@@ -1015,18 +1044,19 @@ void iw_add_readers(IndexWriter *iw, IndexReader **irs, int cnt)
1015
1044
  sm_add(merger, irs[i]);
1016
1045
  }
1017
1046
 
1018
- int doc_count = sm_merge(merger); // merge 'em
1047
+ doc_count = sm_merge(merger); /* merge 'em */
1019
1048
 
1020
- // pop old infos and add new ones.
1049
+ /* pop old infos and add new ones. */
1021
1050
  sis_clear(iw->sis);
1022
1051
  sis_add_si(iw->sis, si_create(merged_name, doc_count, iw->store));
1023
1052
 
1024
1053
 
1025
- Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
1026
- if (!commit_lock->obtain(commit_lock)) // obtain write lock
1054
+ commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
1055
+ if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
1027
1056
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
1057
+ }
1028
1058
 
1029
- sis_write(iw->sis, iw->store); // commit changes
1059
+ sis_write(iw->sis, iw->store); /* commit changes */
1030
1060
  iw_delete_segments(iw, &ir, del_cnt);
1031
1061
  if (ir) ir_close(ir);
1032
1062
 
@@ -1059,24 +1089,28 @@ Norm *norm_create(InStream *is, int field_num)
1059
1089
  return norm;
1060
1090
  }
1061
1091
 
1062
- void norm_destroy(void *p)
1092
+ void norm_destroy(Norm *norm)
1063
1093
  {
1064
- Norm *norm = (Norm *)p;
1065
1094
  is_close(norm->is);
1066
- if (norm->bytes != NULL) free(norm->bytes);
1095
+ if (norm->bytes != NULL) {
1096
+ free(norm->bytes);
1097
+ }
1067
1098
  free(norm);
1068
1099
  }
1069
1100
 
1070
1101
  void norm_rewrite(Norm *norm, Store *store, char *segment,
1071
1102
  int doc_count, Store *cfs_store)
1072
1103
  {
1073
- if (norm->bytes == NULL)
1074
- return; // These norms do not need to be rewritten
1075
-
1104
+ OutStream *os;
1076
1105
  char tmp_fname[SEGMENT_NAME_MAX_LENGTH];
1077
1106
  char norm_fname[SEGMENT_NAME_MAX_LENGTH];
1107
+
1108
+ if (norm->bytes == NULL) {
1109
+ return; /* These norms do not need to be rewritten */
1110
+ }
1111
+
1078
1112
  sprintf(tmp_fname, "%s.tmp", segment);
1079
- OutStream *os = store->create_output(store, tmp_fname);
1113
+ os = store->create_output(store, tmp_fname);
1080
1114
  TRY
1081
1115
  os_write_bytes(os, norm->bytes, doc_count);
1082
1116
  XFINALLY
@@ -1097,7 +1131,7 @@ void norm_rewrite(Norm *norm, Store *store, char *segment,
1097
1131
  *
1098
1132
  ****************************************************************************/
1099
1133
 
1100
- #define GET_SR SegmentReader *sr = (SegmentReader *)ir->data;
1134
+ #define GET_SR SegmentReader *sr = (SegmentReader *)ir->data
1101
1135
 
1102
1136
  int sr_max_doc(IndexReader *ir)
1103
1137
  {
@@ -1129,8 +1163,8 @@ void sr_close(IndexReader *ir)
1129
1163
 
1130
1164
  if (sr->freq_in) is_close(sr->freq_in);
1131
1165
  if (sr->prox_in) is_close(sr->prox_in);
1166
+
1132
1167
  fis_destroy(sr->fis);
1133
-
1134
1168
  sr_close_norms(sr);
1135
1169
 
1136
1170
  if (sr->orig_tvr) {
@@ -1139,7 +1173,7 @@ void sr_close(IndexReader *ir)
1139
1173
  ary_destroy(sr->tvr_bucket);
1140
1174
  }
1141
1175
  if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
1142
- if (sr->cfs_store) sr->cfs_store->close(sr->cfs_store);
1176
+ if (sr->cfs_store) store_deref(sr->cfs_store);
1143
1177
  if (sr->fake_norms) free(sr->fake_norms);
1144
1178
  free(sr->segment);
1145
1179
  free(sr);
@@ -1175,8 +1209,8 @@ bool sr_is_deleted(IndexReader *ir, int doc_num)
1175
1209
 
1176
1210
  bool sr_has_norms(IndexReader *ir, char *field)
1177
1211
  {
1178
- bool has_norms;
1179
1212
  GET_SR;
1213
+ bool has_norms;
1180
1214
  mutex_lock(&ir->mutex);
1181
1215
  has_norms = h_has_key(sr->norms, field);
1182
1216
  mutex_unlock(&ir->mutex);
@@ -1215,13 +1249,13 @@ TermEnum *sr_terms_from(IndexReader *ir, Term *term)
1215
1249
 
1216
1250
  Document *sr_get_doc(IndexReader *ir, int doc_num)
1217
1251
  {
1252
+ GET_SR;
1218
1253
  Document *doc;
1219
1254
  mutex_lock(&ir->mutex);
1220
1255
  if (sr_is_deleted_internal(ir, doc_num)) {
1221
1256
  mutex_unlock(&ir->mutex);
1222
1257
  RAISE(STATE_ERROR, DELETED_DOC_ERROR_MSG);
1223
1258
  }
1224
- GET_SR;
1225
1259
  doc = fr_get_doc(sr->fr, doc_num);
1226
1260
  mutex_unlock(&ir->mutex);
1227
1261
  return doc;
@@ -1234,11 +1268,11 @@ sr_get_norms_into_internal(IndexReader *ir, char *field, uchar *buf, int offset)
1234
1268
  Norm *norm = h_get(sr->norms, field);
1235
1269
  if (norm == NULL) {
1236
1270
  memset(buf + offset*sizeof(uchar), 0, sr_max_doc(ir)*sizeof(uchar));
1237
- } else if (norm->bytes != NULL) { // can copy from cache
1271
+ } else if (norm->bytes != NULL) { /* can copy from cache */
1238
1272
  memcpy(buf + offset*sizeof(uchar), norm->bytes, sr_max_doc(ir)*sizeof(uchar));
1239
1273
  } else {
1240
1274
  InStream *norm_in = is_clone(norm->is);
1241
- // read from disk
1275
+ /* read from disk */
1242
1276
  is_seek(norm_in, 0);
1243
1277
  is_read_bytes(norm_in, buf, offset, sr_max_doc(ir));
1244
1278
  is_close(norm_in);
@@ -1256,13 +1290,14 @@ static inline uchar *sr_get_norms_internal(IndexReader *ir, char *field)
1256
1290
  {
1257
1291
  GET_SR;
1258
1292
  Norm *norm = h_get(sr->norms, field);
1259
- if (norm == NULL) // not an indexed field
1293
+ if (norm == NULL) { /* not an indexed field */
1260
1294
  return NULL;
1295
+ }
1261
1296
 
1262
- if (norm->bytes == NULL) { // value not yet read
1297
+ if (norm->bytes == NULL) { /* value not yet read */
1263
1298
  uchar *bytes = ALLOC_N(uchar, ir->max_doc(ir));
1264
1299
  sr_get_norms_into_internal(ir, field, bytes, 0);
1265
- norm->bytes = bytes; // cache it
1300
+ norm->bytes = bytes; /* cache it */
1266
1301
  }
1267
1302
  return norm->bytes;
1268
1303
  }
@@ -1278,8 +1313,8 @@ uchar *sr_get_norms(IndexReader *ir, char *field)
1278
1313
 
1279
1314
  static inline uchar *sr_get_norms_always(IndexReader *ir, char *field)
1280
1315
  {
1281
- uchar *bytes;
1282
1316
  GET_SR;
1317
+ uchar *bytes;
1283
1318
  mutex_lock(&ir->mutex);
1284
1319
 
1285
1320
  bytes = sr_get_norms_internal(ir, field);
@@ -1303,7 +1338,7 @@ void sr_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
1303
1338
 
1304
1339
  norm = h_get(sr->norms, field);
1305
1340
  if (norm != NULL) { /* an indexed field */
1306
- norm->is_dirty = true; // mark it dirty
1341
+ norm->is_dirty = true; /* mark it dirty */
1307
1342
  sr->norms_dirty = true;
1308
1343
 
1309
1344
  sr_get_norms_internal(ir, field)[doc_num] = val;
@@ -1318,13 +1353,15 @@ int sr_doc_freq(IndexReader *ir, Term *t)
1318
1353
  int df = ti->doc_freq;
1319
1354
  ti_destroy(ti);
1320
1355
  return df;
1321
- } else return 0;
1356
+ } else {
1357
+ return 0;
1358
+ }
1322
1359
  }
1323
1360
 
1324
1361
  Array *sr_file_names(IndexReader *ir)
1325
1362
  {
1326
1363
  GET_SR;
1327
- Array *file_names = ary_create(0, &efree);
1364
+ Array *file_names = ary_create(0, &free);
1328
1365
  FieldInfo *fi;
1329
1366
  int i;
1330
1367
  char fname[SEGMENT_NAME_MAX_LENGTH];
@@ -1352,8 +1389,8 @@ Array *sr_file_names(IndexReader *ir)
1352
1389
 
1353
1390
  HashSet *sr_get_field_names(IndexReader *ir, int field_type)
1354
1391
  {
1355
- int i;
1356
1392
  GET_SR;
1393
+ int i;
1357
1394
  HashSet *field_set = hs_str_create(NULL);
1358
1395
  FieldInfo *fi;
1359
1396
  for (i = 0; i < sr->fis->fcnt; i++) {
@@ -1396,9 +1433,10 @@ HashSet *sr_get_field_names(IndexReader *ir, int field_type)
1396
1433
  int sr_num_docs(IndexReader *ir)
1397
1434
  {
1398
1435
  GET_SR;
1399
-
1436
+ int num_docs;
1437
+
1400
1438
  mutex_lock(&ir->mutex);
1401
- int num_docs = sr_max_doc(ir);
1439
+ num_docs = sr_max_doc(ir);
1402
1440
  if (sr->deleted_docs != NULL)
1403
1441
  num_docs -= sr->deleted_docs->count;
1404
1442
  mutex_unlock(&ir->mutex);
@@ -1444,8 +1482,9 @@ TermVector *sr_get_term_vector(IndexReader *ir, int doc_num, char *field)
1444
1482
  FieldInfo *fi = (FieldInfo *)ht_get(sr->fis->by_name, field);
1445
1483
  TermVectorsReader *tvr;
1446
1484
 
1447
- if (fi == NULL || !fi->store_tv || !sr->orig_tvr || !(tvr = sr_tvr(sr)))
1485
+ if (fi == NULL || !fi->store_tv || !sr->orig_tvr || !(tvr = sr_tvr(sr))) {
1448
1486
  return NULL;
1487
+ }
1449
1488
 
1450
1489
  return tvr_get_field_tv(tvr, doc_num, field);
1451
1490
  }
@@ -1454,8 +1493,9 @@ Array *sr_get_term_vectors(IndexReader *ir, int doc_num)
1454
1493
  {
1455
1494
  GET_SR;
1456
1495
  TermVectorsReader *tvr;
1457
- if (sr->orig_tvr == NULL || (tvr = sr_tvr(sr)) == NULL)
1496
+ if (sr->orig_tvr == NULL || (tvr = sr_tvr(sr)) == NULL) {
1458
1497
  return NULL;
1498
+ }
1459
1499
 
1460
1500
  return tvr_get_tv(tvr, doc_num);
1461
1501
  }
@@ -1465,16 +1505,17 @@ void sr_commit(IndexReader *ir)
1465
1505
  GET_SR;
1466
1506
  char tmp_fname[SEGMENT_NAME_MAX_LENGTH];
1467
1507
  char del_fname[SEGMENT_NAME_MAX_LENGTH];
1508
+
1468
1509
  sprintf(del_fname, "%s.del", sr->segment);
1469
1510
 
1470
- if (sr->deleted_docs_dirty) { // re-write deleted
1511
+ if (sr->deleted_docs_dirty) { /* re-write deleted */
1471
1512
  sprintf(tmp_fname, "%s.tmp", sr->segment);
1472
1513
  bv_write(sr->deleted_docs, ir->store, tmp_fname);
1473
1514
  ir->store->rename(ir->store, tmp_fname, del_fname);
1474
1515
  }
1475
1516
  if (sr->undelete_all && ir->store->exists(ir->store, del_fname))
1476
1517
  ir->store->remove(ir->store, del_fname);
1477
- if (sr->norms_dirty) {// re-write norms
1518
+ if (sr->norms_dirty) {/* re-write norms */
1478
1519
  int i;
1479
1520
  FieldInfo *fi;
1480
1521
  for (i = 0; i < sr->fis->fcnt; i++) {
@@ -1494,6 +1535,8 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1494
1535
  {
1495
1536
  Store *store = si->store;
1496
1537
  SegmentReader *sr = ALLOC(SegmentReader);
1538
+ char fname[SEGMENT_NAME_MAX_LENGTH];
1539
+
1497
1540
  ir->get_term_vector = &sr_get_term_vector;
1498
1541
  ir->get_term_vectors = &sr_get_term_vectors;
1499
1542
  ir->num_docs = &sr_num_docs;
@@ -1518,7 +1561,6 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1518
1561
  ir->do_close = &sr_close;
1519
1562
  ir->data = sr;
1520
1563
  sr->segment = estrdup(si->name);
1521
- char fname[SEGMENT_NAME_MAX_LENGTH];
1522
1564
  sr->cfs_store = NULL;
1523
1565
  sr->fake_norms = NULL;
1524
1566
  sprintf(fname, "%s.cfs", sr->segment);
@@ -1545,13 +1587,13 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1545
1587
  sr->freq_in = store->open_input(store, fname);
1546
1588
  sprintf(fname, "%s.prx", sr->segment);
1547
1589
  sr->prox_in = store->open_input(store, fname);
1548
- sr->norms = h_new_str(NULL, &norm_destroy);
1590
+ sr->norms = h_new_str((free_ft)NULL, (free_ft)&norm_destroy);
1549
1591
  sr_open_norms(ir, store);
1550
1592
 
1551
1593
  if (fis_has_vectors(sr->fis)) {
1552
1594
  sr->orig_tvr = tvr_open(store, sr->segment, sr->fis);
1553
1595
  thread_key_create(&sr->thread_tvr, NULL);
1554
- sr->tvr_bucket = ary_create(1, (destroy_func_t)&tvr_close);
1596
+ sr->tvr_bucket = ary_create(1, (free_ft)&tvr_close);
1555
1597
  } else {
1556
1598
  sr->orig_tvr = NULL;
1557
1599
  }
@@ -1560,16 +1602,19 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1560
1602
 
1561
1603
  IndexReader *sr_open_si(SegmentInfo *si)
1562
1604
  {
1563
- IndexReader *ir = ir_create(si->store, NULL, false, false);
1605
+ IndexReader *ir = ir_create(si->store, NULL, false);
1606
+ ref(si->store);
1564
1607
  return sr_open_internal(ir, si);
1565
1608
  }
1566
1609
 
1567
- IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_store)
1610
+ IndexReader *sr_open(SegmentInfos *sis, int si_num, bool is_owner)
1568
1611
  {
1569
1612
  SegmentInfo *si = sis->segs[si_num];
1570
- IndexReader *ir = ir_create(si->store, sis, is_owner, close_store);
1613
+ IndexReader *ir = ir_create(si->store, sis, is_owner);
1614
+ ref(si->store);
1571
1615
  return sr_open_internal(ir, si);
1572
1616
  }
1617
+
1573
1618
  /****************************************************************************
1574
1619
  *
1575
1620
  * MultiReader
@@ -1579,14 +1624,14 @@ IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_stor
1579
1624
  #define GET_MR MultiReader *mr = (MultiReader *)ir->data
1580
1625
  #define GET_READER(doc_num) MultiReader *mr = (MultiReader *)ir->data;\
1581
1626
  int i = mr_reader_index(mr, doc_num);\
1582
- IndexReader *reader = mr->sub_readers[i];
1627
+ IndexReader *reader = mr->sub_readers[i]
1583
1628
 
1584
1629
 
1585
1630
 
1586
1631
  int mr_reader_index(MultiReader *mr, int doc_num)
1587
1632
  {
1588
- int lo = 0; // search @starts array
1589
- int hi = mr->rcnt - 1; // for first element less
1633
+ int lo = 0; /* search @starts array */
1634
+ int hi = mr->rcnt - 1; /* for first element less */
1590
1635
  int mid;
1591
1636
  int mid_value;
1592
1637
 
@@ -1597,9 +1642,9 @@ int mr_reader_index(MultiReader *mr, int doc_num)
1597
1642
  hi = mid - 1;
1598
1643
  } else if (doc_num > mid_value) {
1599
1644
  lo = mid + 1;
1600
- } else { // found a match
1645
+ } else { /* found a match */
1601
1646
  while ((mid+1 < mr->rcnt) && (mr->starts[mid+1] == mid_value))
1602
- mid += 1; // scan to last match in case we have empty segments
1647
+ mid += 1; /* scan to last match in case we have empty segments */
1603
1648
  return mid;
1604
1649
  }
1605
1650
  }
@@ -1652,10 +1697,11 @@ Document *mr_get_doc(IndexReader *ir, int doc_num)
1652
1697
  void mr_get_norms_into(IndexReader *ir, char *field, uchar *buf, int offset)
1653
1698
  {
1654
1699
  int i;
1700
+ uchar *bytes;
1655
1701
  GET_MR;
1656
1702
 
1657
1703
  mutex_lock(&ir->mutex);
1658
- uchar *bytes = h_get(mr->norms_cache, field);
1704
+ bytes = h_get(mr->norms_cache, field);
1659
1705
  if (bytes != NULL) {
1660
1706
  memcpy(buf + offset, bytes, mr->max_doc);
1661
1707
  } else {
@@ -1684,7 +1730,7 @@ uchar *mr_get_norms(IndexReader *ir, char *field)
1684
1730
  reader = mr->sub_readers[i];
1685
1731
  reader->get_norms_into(reader, field, bytes, mr->starts[i]);
1686
1732
  }
1687
- h_set(mr->norms_cache, field, bytes); // update cache
1733
+ h_set(mr->norms_cache, field, bytes); /* update cache */
1688
1734
  }
1689
1735
  mutex_unlock(&ir->mutex);
1690
1736
 
@@ -1694,7 +1740,7 @@ uchar *mr_get_norms(IndexReader *ir, char *field)
1694
1740
  void mr_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
1695
1741
  {
1696
1742
  GET_READER(doc_num);
1697
- h_del(mr->norms_cache, field); // clear cache
1743
+ h_del(mr->norms_cache, field); /* clear cache */
1698
1744
  ir_set_norm(reader, doc_num - mr->starts[i], field, val);
1699
1745
  }
1700
1746
 
@@ -1712,7 +1758,7 @@ TermEnum *mr_terms_from(IndexReader *ir, Term *term)
1712
1758
 
1713
1759
  int mr_doc_freq(IndexReader *ir, Term *t)
1714
1760
  {
1715
- int total = 0, i; // sum freqs in segments
1761
+ int total = 0, i; /* sum freqs in segments */
1716
1762
  GET_MR;
1717
1763
 
1718
1764
  IndexReader *reader;
@@ -1738,9 +1784,10 @@ TermDocEnum *mr_term_positions(IndexReader *ir)
1738
1784
  void mr_delete_doc(IndexReader *ir, int doc_num)
1739
1785
  {
1740
1786
  GET_READER(doc_num);
1741
- mr->num_docs_cache = -1; // invalidate cache
1787
+ mr->num_docs_cache = -1; /* invalidate cache */
1742
1788
 
1743
- reader->do_delete_doc(reader, doc_num - mr->starts[i]); // dispatch to segment reader
1789
+ /* dispatch to segment reader */
1790
+ reader->do_delete_doc(reader, doc_num - mr->starts[i]);
1744
1791
  mr->has_deletions = true;
1745
1792
  }
1746
1793
 
@@ -1778,8 +1825,9 @@ void mr_undelete_all(IndexReader *ir)
1778
1825
  {
1779
1826
  int i;
1780
1827
  GET_MR;
1781
- mr->num_docs_cache = -1; // invalidate cache
1782
1828
  IndexReader *reader;
1829
+
1830
+ mr->num_docs_cache = -1; /* invalidate cache */
1783
1831
  for (i = 0; i < mr->rcnt; i++) {
1784
1832
  reader = mr->sub_readers[i];
1785
1833
  reader->do_undelete_all(reader);
@@ -1829,12 +1877,12 @@ void mr_close(IndexReader *ir)
1829
1877
  IndexReader *mr_open(Store *store,
1830
1878
  SegmentInfos *sis,
1831
1879
  IndexReader **sub_readers,
1832
- int rcnt,
1833
- int close_store)
1880
+ int rcnt)
1834
1881
  {
1835
1882
  int i;
1836
1883
  MultiReader *mr = ALLOC(MultiReader);
1837
1884
  IndexReader *sub_reader;
1885
+ IndexReader *ir;
1838
1886
  mr->sub_readers = sub_readers;
1839
1887
  mr->rcnt = rcnt;
1840
1888
 
@@ -1846,15 +1894,16 @@ IndexReader *mr_open(Store *store,
1846
1894
  for (i = 0; i < rcnt; i++) {
1847
1895
  sub_reader = sub_readers[i];
1848
1896
  mr->starts[i] = mr->max_doc;
1849
- mr->max_doc += sub_reader->max_doc(sub_reader); // compute max_docs
1897
+ mr->max_doc += sub_reader->max_doc(sub_reader); /* compute max_docs */
1850
1898
 
1851
- if (sub_reader->has_deletions(sub_reader))
1899
+ if (sub_reader->has_deletions(sub_reader)) {
1852
1900
  mr->has_deletions = true;
1901
+ }
1853
1902
  }
1854
1903
  mr->starts[rcnt] = mr->max_doc;
1855
- mr->norms_cache = h_new_str(NULL, &efree);
1904
+ mr->norms_cache = h_new_str(NULL, &free);
1856
1905
 
1857
- IndexReader *ir = ir_create(store, sis, true, close_store);
1906
+ ir = ir_create(store, sis, true);
1858
1907
  ir->get_term_vector = &mr_get_term_vector;
1859
1908
  ir->get_term_vectors = &mr_get_term_vectors;
1860
1909
  ir->num_docs = &mr_num_docs;
@@ -1888,11 +1937,8 @@ IndexReader *mr_open(Store *store,
1888
1937
  *
1889
1938
  ****************************************************************************/
1890
1939
 
1891
- bool smi_lt(void *p1, void *p2)
1940
+ bool smi_lt(SegmentMergeInfo *smi1, SegmentMergeInfo *smi2)
1892
1941
  {
1893
- SegmentMergeInfo *smi1 = (SegmentMergeInfo *)p1;
1894
- SegmentMergeInfo *smi2 = (SegmentMergeInfo *)p2;
1895
-
1896
1942
  int cmpres = tb_cmp(smi1->tb, smi2->tb);
1897
1943
  if (cmpres == 0) {
1898
1944
  return smi1->base < smi2->base;
@@ -1906,8 +1952,9 @@ int *smi_load_doc_map(SegmentMergeInfo *smi)
1906
1952
  IndexReader *ir = smi->ir;
1907
1953
  if (ir->has_deletions(ir) && (smi->doc_map == NULL)) {
1908
1954
  int max_doc = ir->max_doc(ir);
1909
- smi->doc_map = ALLOC_N(int, max_doc);
1910
1955
  int j = 0, i;
1956
+
1957
+ smi->doc_map = ALLOC_N(int, max_doc);
1911
1958
  for (i = 0; i < max_doc; i++) {
1912
1959
  if (ir->is_deleted(ir, i)) {
1913
1960
  smi->doc_map[i] = -1;
@@ -1931,9 +1978,8 @@ SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir)
1931
1978
  return smi;
1932
1979
  }
1933
1980
 
1934
- void smi_destroy(void *p)
1981
+ void smi_destroy(SegmentMergeInfo *smi)
1935
1982
  {
1936
- SegmentMergeInfo *smi = (SegmentMergeInfo *)p;
1937
1983
  smi->postings->close(smi->postings);
1938
1984
  smi->te->close(smi->te);
1939
1985
  if (smi->doc_map != NULL)
@@ -1957,7 +2003,7 @@ SegmentMerger *sm_create(Store *store, char *name, int term_index_interval)
1957
2003
  SegmentMerger *sm = ALLOC(SegmentMerger);
1958
2004
  sm->store = store;
1959
2005
  sm->name = estrdup(name);
1960
- sm->readers = ary_create(config.merge_factor, &ir_destroy);
2006
+ sm->readers = ary_create(config.merge_factor, (free_ft)&ir_close);
1961
2007
  sm->fis = NULL;
1962
2008
  sm->freq_out = NULL;
1963
2009
  sm->prox_out = NULL;
@@ -1976,8 +2022,9 @@ void sm_close(SegmentMerger *sm)
1976
2022
  if (sm->freq_out != NULL) os_close(sm->freq_out);
1977
2023
  if (sm->prox_out != NULL) os_close(sm->prox_out);
1978
2024
  if (sm->tiw != NULL) {
1979
- for (i = 0; i < sm->terms_buf_size; i++)
2025
+ for (i = 0; i < sm->terms_buf_size; i++) {
1980
2026
  free(sm->terms_buf[i].text);
2027
+ }
1981
2028
  free(sm->terms_buf);
1982
2029
  tiw_close(sm->tiw);
1983
2030
  }
@@ -1988,9 +2035,8 @@ void sm_close(SegmentMerger *sm)
1988
2035
  sm->queue = NULL;
1989
2036
  }
1990
2037
 
1991
- void sm_destroy(void *p)
2038
+ void sm_destroy(SegmentMerger *sm)
1992
2039
  {
1993
- SegmentMerger *sm = (SegmentMerger *)p;
1994
2040
  if (sm->fis != NULL) fis_destroy(sm->fis);
1995
2041
  ary_destroy(sm->readers);
1996
2042
  sm_close(sm);
@@ -2028,6 +2074,8 @@ int sm_merge_fields(SegmentMerger *sm)
2028
2074
  FieldInfos *fis = sm->fis = fis_create();
2029
2075
  int doc_count = 0;
2030
2076
  Document *doc;
2077
+ FieldsWriter *fw;
2078
+
2031
2079
  for (i = 0; i < sm->readers->size; i++) {
2032
2080
  IndexReader *ir = sm->readers->elems[i];
2033
2081
 
@@ -2049,15 +2097,15 @@ int sm_merge_fields(SegmentMerger *sm)
2049
2097
  }
2050
2098
  fis_write(fis, sm->store, sm->name, ".fnm");
2051
2099
 
2052
- // merge field values
2053
- FieldsWriter *fw = fw_open(sm->store, sm->name, fis);
2100
+ /* merge field values */
2101
+ fw = fw_open(sm->store, sm->name, fis);
2054
2102
 
2055
2103
  TRY
2056
2104
  for (i = 0; i < sm->readers->size; i++) {
2057
2105
  IndexReader *ir = sm->readers->elems[i];
2058
2106
  maxdoc = ir->max_doc(ir);
2059
2107
  for (j = 0; j < maxdoc; j++) {
2060
- if (!ir->is_deleted(ir, j)) { // skip deleted docs
2108
+ if (!ir->is_deleted(ir, j)) { /* skip deleted docs */
2061
2109
  doc = ir->get_doc(ir, j);
2062
2110
  fw_add_doc(fw, doc);
2063
2111
  doc_destroy(doc);
@@ -2098,7 +2146,7 @@ int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
2098
2146
  int i, j;
2099
2147
  int last_doc = 0, base, doc, doc_code, freq, last_position, position;
2100
2148
  int *doc_map = NULL;
2101
- int df = 0; // number of docs w/ term
2149
+ int df = 0; /* number of docs w/ term */
2102
2150
  TermDocEnum *postings;
2103
2151
  SegmentMergeInfo *smi;
2104
2152
  sm_reset_skip(sm);
@@ -2111,31 +2159,34 @@ int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
2111
2159
  stde_seek_ti(postings, smi->te->ti_curr);
2112
2160
  while (postings->next(postings)) {
2113
2161
  doc = postings->doc_num(postings);
2114
- if (doc_map != NULL)
2115
- doc = doc_map[doc]; // work around deletions
2116
- doc += base; // convert to merged space
2162
+ if (doc_map != NULL) {
2163
+ doc = doc_map[doc]; /* work around deletions */
2164
+ }
2165
+ doc += base; /* convert to merged space */
2117
2166
 
2118
- if (doc < last_doc)
2167
+ if (doc < last_doc) {
2119
2168
  RAISE(STATE_ERROR, DOC_ORDER_ERROR_MSG);
2169
+ }
2120
2170
 
2121
2171
  df++;
2122
2172
 
2123
- if ((df % sm->skip_interval) == 0)
2173
+ if ((df % sm->skip_interval) == 0) {
2124
2174
  sm_buffer_skip(sm, last_doc);
2175
+ }
2125
2176
 
2126
- doc_code = (doc - last_doc) << 1; // use low bit to flag freq=1
2177
+ doc_code = (doc - last_doc) << 1; /* use low bit to flag freq=1 */
2127
2178
  last_doc = doc;
2128
2179
 
2129
2180
  freq = postings->freq(postings);
2130
2181
  if (freq == 1) {
2131
- os_write_vint(sm->freq_out, doc_code | 1); // write doc & freq=1
2182
+ os_write_vint(sm->freq_out, doc_code | 1); /* write doc & freq=1 */
2132
2183
  } else {
2133
- os_write_vint(sm->freq_out, doc_code); // write doc
2134
- os_write_vint(sm->freq_out, freq); // write freqency in doc
2184
+ os_write_vint(sm->freq_out, doc_code); /* write doc */
2185
+ os_write_vint(sm->freq_out, freq); /* write freqency in doc */
2135
2186
  }
2136
2187
 
2137
2188
 
2138
- last_position = 0; // write position deltas
2189
+ last_position = 0; /* write position deltas */
2139
2190
  for (j = 0; j < freq; j++) {
2140
2191
  position = postings->next_position(postings);
2141
2192
  os_write_vint(sm->prox_out, position - last_position);
@@ -2167,12 +2218,12 @@ void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
2167
2218
  int freq_pointer = os_pos(sm->freq_out);
2168
2219
  int prox_pointer = os_pos(sm->prox_out);
2169
2220
 
2170
- int df = sm_append_postings(sm, smis, cnt); // append posting data
2221
+ int df = sm_append_postings(sm, smis, cnt); /* append posting data */
2171
2222
 
2172
2223
  int skip_pointer = sm_write_skip(sm);
2173
2224
 
2174
2225
  if (df > 0) {
2175
- // add an entry to the dictionary with pointers to prox and freq files
2226
+ /* add an entry to the dictionary with pointers to prox and freq files */
2176
2227
  ti_set(sm->ti, df, freq_pointer, prox_pointer, (skip_pointer - freq_pointer));
2177
2228
  tiw_add(sm->tiw, sm_tb_to_term(sm, smis[0]->tb), sm->ti);
2178
2229
  }
@@ -2184,7 +2235,7 @@ void sm_merge_term_infos(SegmentMerger *sm)
2184
2235
  int i, match_size;
2185
2236
  IndexReader *ir;
2186
2237
  TermEnum *te;
2187
- SegmentMergeInfo *smi, *top;
2238
+ SegmentMergeInfo *smi, *top, **match;
2188
2239
  TermBuffer *tb;
2189
2240
 
2190
2241
  for (i = 0; i < sm->readers->size; i++) {
@@ -2192,20 +2243,23 @@ void sm_merge_term_infos(SegmentMerger *sm)
2192
2243
  te = ir->terms(ir);
2193
2244
  smi = smi_create(base, te, ir);
2194
2245
  base += ir->num_docs(ir);
2195
- if (smi_next(smi) != NULL)
2196
- pq_push(sm->queue, smi); // initialize @queue
2197
- else
2246
+ if (smi_next(smi) != NULL) {
2247
+ pq_push(sm->queue, smi); /* initialize @queue */
2248
+ } else {
2198
2249
  smi_destroy(smi);
2250
+ }
2199
2251
  }
2200
2252
 
2201
- SegmentMergeInfo **match = ALLOC_N(SegmentMergeInfo *, sm->readers->size);
2253
+ match = ALLOC_N(SegmentMergeInfo *, sm->readers->size);
2202
2254
 
2203
2255
  while (sm->queue->count > 0) {
2204
- // for (i = 1; i <= sm->queue->count; i++) {
2205
- // printf("<{%s:%s}>", ((SegmentMergeInfo *)sm->queue->heap[i])->tb->field,
2206
- // ((SegmentMergeInfo *)sm->queue->heap[i])->tb->text);
2207
- // }printf("\n\n");
2208
- match_size = 0; // pop matching terms
2256
+ /*
2257
+ for (i = 1; i <= sm->queue->count; i++) {
2258
+ printf("<{%s:%s}>", ((SegmentMergeInfo *)sm->queue->heap[i])->tb->field,
2259
+ ((SegmentMergeInfo *)sm->queue->heap[i])->tb->text);
2260
+ }printf("\n\n");
2261
+ */
2262
+ match_size = 0; /* pop matching terms */
2209
2263
  match[match_size] = pq_pop(sm->queue);
2210
2264
  match_size++;
2211
2265
  tb = match[0]->tb;
@@ -2216,16 +2270,17 @@ void sm_merge_term_infos(SegmentMerger *sm)
2216
2270
  top = pq_top(sm->queue);
2217
2271
  }
2218
2272
 
2219
- //printf(">%s:%s<\n", match[0]->tb->field, match[0]->tb->text);
2220
- sm_merge_term_info(sm, match, match_size); // add new TermInfo
2273
+ /* printf(">%s:%s<\n", match[0]->tb->field, match[0]->tb->text); */
2274
+ sm_merge_term_info(sm, match, match_size); /* add new TermInfo */
2221
2275
 
2222
2276
  while (match_size > 0) {
2223
2277
  match_size--;
2224
2278
  smi = match[match_size];
2225
- if (smi_next(smi) != NULL)
2226
- pq_push(sm->queue, smi); // restore queue
2227
- else
2228
- smi_destroy(smi); // done with a segment
2279
+ if (smi_next(smi) != NULL) {
2280
+ pq_push(sm->queue, smi); /* restore queue */
2281
+ } else {
2282
+ smi_destroy(smi); /* done with a segment */
2283
+ }
2229
2284
  }
2230
2285
  }
2231
2286
  free(match);
@@ -2242,10 +2297,10 @@ void sm_merge_terms(SegmentMerger *sm)
2242
2297
  sprintf(fname, "%s.prx", sm->name);
2243
2298
  sm->prox_out = sm->store->create_output(sm->store, fname);
2244
2299
  sm->tiw = tiw_open(sm->store, sm->name, sm->fis, sm->term_index_interval);
2245
- // terms_buf_pointer holds a buffer of terms since the TermInfosWriter needs
2246
- // to keep the last index_interval terms so that it can compare the last term
2247
- // put in the index with the next one. So the size of the buffer must by
2248
- // index_interval + 2.
2300
+ /* terms_buf_pointer holds a buffer of terms since the TermInfosWriter needs
2301
+ * to keep the last index_interval terms so that it can compare the last term
2302
+ * put in the index with the next one. So the size of the buffer must by
2303
+ * index_interval + 2. */
2249
2304
  sm->terms_buf_pointer = 0;
2250
2305
  sm->terms_buf_size = sm->tiw->index_interval + 2;
2251
2306
  sm->terms_buf = ALLOC_N(Term, sm->terms_buf_size);
@@ -2254,7 +2309,7 @@ void sm_merge_terms(SegmentMerger *sm)
2254
2309
  sm->terms_buf[i].text = ALLOC_N(char, MAX_WORD_SIZE);
2255
2310
  }
2256
2311
  sm->skip_interval = sm->tiw->skip_interval;
2257
- sm->queue = pq_create(sm->readers->size, &smi_lt);
2312
+ sm->queue = pq_create(sm->readers->size, (lt_ft)&smi_lt);
2258
2313
 
2259
2314
  sm_merge_term_infos(sm);
2260
2315
 
@@ -2308,11 +2363,13 @@ void sm_merge_vectors(SegmentMerger *sm)
2308
2363
  ir = sm->readers->elems[i];
2309
2364
  max_doc = ir->max_doc(ir);
2310
2365
  for (j = 0; j < max_doc; j++) {
2311
- // skip deleted docs
2366
+ /* skip deleted docs */
2312
2367
  if (! ir->is_deleted(ir, j)) {
2313
2368
  tvs = ir->get_term_vectors(ir, j);
2314
- tvw_add_all_doc_vectors(tvw, tvs);
2315
- ary_destroy(tvs);
2369
+ if (tvs) {
2370
+ tvw_add_all_doc_vectors(tvw, tvs);
2371
+ ary_destroy(tvs);
2372
+ }
2316
2373
  }
2317
2374
  }
2318
2375
  }
@@ -2333,7 +2390,7 @@ int sm_merge(SegmentMerger *sm)
2333
2390
 
2334
2391
  Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2335
2392
  {
2336
- Array *files = ary_create(0, &efree);
2393
+ Array *files = ary_create(0, &free);
2337
2394
  CompoundWriter *cw = open_cw(sm->store, file_name);
2338
2395
  FieldInfo *fi;
2339
2396
  char fname[SEGMENT_NAME_MAX_LENGTH];
@@ -2344,7 +2401,7 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2344
2401
  ary_append(files, estrdup(fname));
2345
2402
  }
2346
2403
 
2347
- // Field norm files
2404
+ /* Field norm files */
2348
2405
  for (i = 0; i < sm->fis->fcnt; i++) {
2349
2406
  fi = sm->fis->by_number[i];
2350
2407
  if (fi->is_indexed && !fi->omit_norms) {
@@ -2353,7 +2410,7 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2353
2410
  }
2354
2411
  }
2355
2412
 
2356
- // Vector files
2413
+ /* Vector files */
2357
2414
  if (fis_has_vectors(sm->fis)) {
2358
2415
  for (i = 0; i < NELEMS(VECTOR_EXTENSIONS); i++) {
2359
2416
  sprintf(fname, "%s.%s", sm->name, VECTOR_EXTENSIONS[i]);
@@ -2361,12 +2418,12 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2361
2418
  }
2362
2419
  }
2363
2420
 
2364
- // Now merge all added files
2421
+ /* Now merge all added files */
2365
2422
  for (i = 0; i < files->size; i++) {
2366
2423
  cw_add_file(cw, (char *)files->elems[i]);
2367
2424
  }
2368
2425
 
2369
- // Perform the merge
2426
+ /* Perform the merge */
2370
2427
  cw_close(cw);
2371
2428
 
2372
2429
  return files;
@@ -2386,11 +2443,11 @@ void ir_acquire_write_lock(IndexReader *ir)
2386
2443
 
2387
2444
  if (ir->write_lock == NULL) {
2388
2445
  ir->write_lock = ir->store->open_lock(ir->store, WRITE_LOCK_NAME);
2389
- if (!ir->write_lock->obtain(ir->write_lock)) // obtain write lock
2446
+ if (!ir->write_lock->obtain(ir->write_lock)) /* obtain write lock */
2390
2447
  RAISE(STATE_ERROR, WRITE_LOCK_ERROR_MSG);
2391
2448
 
2392
- // we have to check whether index has changed since this reader was opened.
2393
- // if so, this reader is no longer valid for deletion
2449
+ /* we have to check whether index has changed since this reader was opened.
2450
+ * if so, this reader is no longer valid for deletion */
2394
2451
  if (sis_read_current_version(ir->store) > ir->sis->version) {
2395
2452
  ir->is_stale = true;
2396
2453
  ir->write_lock->release(ir->write_lock);
@@ -2401,7 +2458,7 @@ void ir_acquire_write_lock(IndexReader *ir)
2401
2458
  }
2402
2459
  }
2403
2460
 
2404
- IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_store)
2461
+ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner)
2405
2462
  {
2406
2463
  IndexReader *ir = ALLOC(IndexReader);
2407
2464
 
@@ -2414,7 +2471,6 @@ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_
2414
2471
  }
2415
2472
 
2416
2473
  ir->store = store;
2417
- ir->close_store = close_store;
2418
2474
  ir->sis = sis;
2419
2475
  ir->has_changes = false;
2420
2476
  ir->is_stale = false;
@@ -2424,7 +2480,11 @@ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_
2424
2480
  return ir;
2425
2481
  }
2426
2482
 
2427
- IndexReader *ir_open(Store *store, int close_store)
2483
+ /**
2484
+ * Will keep a reference to the store. To let this method delete the store
2485
+ * make sure you deref the store that you pass to it
2486
+ */
2487
+ IndexReader *ir_open(Store *store)
2428
2488
  {
2429
2489
  int i;
2430
2490
  IndexReader *ir;
@@ -2434,13 +2494,14 @@ IndexReader *ir_open(Store *store, int close_store)
2434
2494
  sis = sis_create();
2435
2495
  sis_read(sis, store);
2436
2496
  if (sis->scnt == 1) {
2437
- ir = sr_open(sis, 0, true, close_store);
2497
+ ir = sr_open(sis, 0, true);
2438
2498
  } else {
2439
2499
  IndexReader **readers = ALLOC_N(IndexReader *, sis->scnt);
2440
2500
  for (i = 0; i < sis->scnt; i++) {
2441
- readers[i] = sr_open(sis, i, false, false);
2501
+ readers[i] = sr_open(sis, i, false);
2442
2502
  }
2443
- ir = mr_open(store, sis, readers, sis->scnt, close_store);
2503
+ ref(store);
2504
+ ir = mr_open(store, sis, readers, sis->scnt);
2444
2505
  }
2445
2506
  mutex_unlock(&store->mutex);
2446
2507
  return ir;
@@ -2481,11 +2542,13 @@ void ir_delete_doc(IndexReader *ir, int doc_num)
2481
2542
  Document *ir_get_doc_with_term(IndexReader *ir, Term *term)
2482
2543
  {
2483
2544
  TermDocEnum *tde = ir_term_docs_for(ir, term);
2545
+ Document *doc = NULL;
2546
+
2484
2547
  if (!tde) return NULL;
2485
2548
 
2486
- Document *doc = NULL;
2487
- if (tde->next(tde))
2549
+ if (tde->next(tde)) {
2488
2550
  doc = ir->get_doc(ir, tde->doc_num(tde));
2551
+ }
2489
2552
  tde->close(tde);
2490
2553
  return doc;
2491
2554
  }
@@ -2508,11 +2571,13 @@ void ir_commit_internal(IndexReader *ir)
2508
2571
  {
2509
2572
  if (ir->has_changes) {
2510
2573
  if (ir->is_owner) {
2574
+ Lock *commit_lock;
2511
2575
 
2512
2576
  mutex_lock(&ir->store->mutex);
2513
- Lock *commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
2514
- if (!commit_lock->obtain(commit_lock)) // obtain write lock
2577
+ commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
2578
+ if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
2515
2579
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
2580
+ }
2516
2581
 
2517
2582
  ir->do_commit(ir);
2518
2583
  sis_write(ir->sis, ir->store);
@@ -2522,7 +2587,7 @@ void ir_commit_internal(IndexReader *ir)
2522
2587
  mutex_unlock(&ir->store->mutex);
2523
2588
 
2524
2589
  if (ir->write_lock != NULL) {
2525
- ir->write_lock->release(ir->write_lock); // release write lock
2590
+ ir->write_lock->release(ir->write_lock); /* release write lock */
2526
2591
  ir->store->close_lock(ir->write_lock);
2527
2592
  ir->write_lock = NULL;
2528
2593
  }
@@ -2545,9 +2610,7 @@ void ir_close(IndexReader *ir)
2545
2610
  mutex_lock(&ir->mutex);
2546
2611
  ir_commit_internal(ir);
2547
2612
  ir->do_close(ir);
2548
- if (ir->close_store) {
2549
- ir->store->close(ir->store);
2550
- }
2613
+ store_deref(ir->store);
2551
2614
  if (ir->is_owner) {
2552
2615
  sis_destroy(ir->sis);
2553
2616
  }
@@ -2562,12 +2625,6 @@ void ir_close(IndexReader *ir)
2562
2625
  free(ir);
2563
2626
  }
2564
2627
 
2565
- void ir_destroy(void *p)
2566
- {
2567
- IndexReader *ir = (IndexReader *)p;
2568
- ir_close(ir);
2569
- }
2570
-
2571
2628
  /**
2572
2629
  * Don't call this method if the cache already exists
2573
2630
  **/