ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/index_rw.c CHANGED
@@ -1,4 +1,4 @@
1
- #include <index.h>
1
+ #include "index.h"
2
2
  #include <stdlib.h>
3
3
  #include <string.h>
4
4
  #include <array.h>
@@ -24,11 +24,11 @@ const char *VECTOR_EXTENSIONS[] = {
24
24
  };
25
25
 
26
26
  FerretConfig config = {
27
- 10, // default merge_factor
28
- 10, // default min_merge_docs
29
- INT_MAX, // default max_merge_docs
30
- 10000, // default max_field_length
31
- 128 // default term_index_interval
27
+ 10, /* default merge_factor */
28
+ 10, /* default min_merge_docs */
29
+ INT_MAX, /* default max_merge_docs */
30
+ 10000, /* default max_field_length */
31
+ 128 /* default term_index_interval */
32
32
  };
33
33
 
34
34
  /***************************************************************************
@@ -47,33 +47,32 @@ int co_eq(const void *key1, const void *key2)
47
47
  return (key1 == key2);
48
48
  }
49
49
 
50
- void co_destroy(void *p)
50
+ void co_destroy(CacheObject *self)
51
51
  {
52
- CacheObject *co = (CacheObject *)p;
53
- h_rem(co->ref_tab1, co->ref2, false);
54
- h_rem(co->ref_tab2, co->ref1, false);
55
- co->destroy(co->obj);
56
- free(co);
52
+ h_rem(self->ref_tab1, self->ref2, false);
53
+ h_rem(self->ref_tab2, self->ref1, false);
54
+ self->destroy(self->obj);
55
+ free(self);
57
56
  }
58
57
 
59
58
  CacheObject *co_create(HshTable *ref_tab1, HshTable *ref_tab2,
60
- void *ref1, void *ref2, void (*destroy)(void *p), void *obj)
59
+ void *ref1, void *ref2, free_ft destroy, void *obj)
61
60
  {
62
- CacheObject *co = ALLOC(CacheObject);
63
- h_set(ref_tab1, ref2, co);
64
- h_set(ref_tab2, ref1, co);
65
- co->ref_tab1 = ref_tab1;
66
- co->ref_tab2 = ref_tab2;
67
- co->ref1 = ref1;
68
- co->ref2 = ref2;
69
- co->destroy = destroy;
70
- co->obj = obj;
71
- return co;
61
+ CacheObject *self = ALLOC(CacheObject);
62
+ h_set(ref_tab1, ref2, self);
63
+ h_set(ref_tab2, ref1, self);
64
+ self->ref_tab1 = ref_tab1;
65
+ self->ref_tab2 = ref_tab2;
66
+ self->ref1 = ref1;
67
+ self->ref2 = ref2;
68
+ self->destroy = destroy;
69
+ self->obj = obj;
70
+ return self;
72
71
  }
73
72
 
74
73
  HshTable *co_hsh_create()
75
74
  {
76
- return h_new(&co_hash, &co_eq, NULL, &co_destroy);
75
+ return h_new(&co_hash, &co_eq, (free_ft)NULL, (free_ft)&co_destroy);
77
76
  }
78
77
 
79
78
  /***************************************************************************
@@ -84,39 +83,38 @@ HshTable *co_hsh_create()
84
83
 
85
84
  Posting *p_create(Term *term, int position, TVOffsetInfo *offset)
86
85
  {
87
- Posting *p = ALLOC(Posting);
88
- p->freq = 1;
89
- p->size = 1;
90
- p->term = term;
91
- p->positions = ALLOC(int);
92
- p->positions[0] = position;
93
- p->offsets = ALLOC(TVOffsetInfo *);
94
- p->offsets[0] = offset;
95
- return p;
86
+ Posting *self = ALLOC(Posting);
87
+ self->freq = 1;
88
+ self->size = 1;
89
+ self->term = term;
90
+ self->positions = ALLOC(int);
91
+ self->positions[0] = position;
92
+ self->offsets = ALLOC(TVOffsetInfo *);
93
+ self->offsets[0] = offset;
94
+ return self;
96
95
  }
97
96
 
98
- void p_destroy(void *p)
97
+ void p_destroy(Posting *self)
99
98
  {
100
- // the positions and offsets will be put in a TVTerm so no need to free
99
+ /* the positions and offsets will be put in a TVTerm so no need to free */
101
100
  int i;
102
- Posting *post = (Posting *)p;
103
- free(post->positions);
104
- for (i = 0; i < post->freq; i++)
105
- tvoi_destroy(post->offsets[i]);
106
- free(post->offsets);
107
- free(p);
101
+ free(self->positions);
102
+ for (i = 0; i < self->freq; i++)
103
+ tvoi_destroy(self->offsets[i]);
104
+ free(self->offsets);
105
+ free(self);
108
106
  }
109
107
 
110
- void p_add_occurance(Posting *p, int position, TVOffsetInfo *offset)
108
+ void p_add_occurance(Posting *self, int position, TVOffsetInfo *offset)
111
109
  {
112
- if (p->freq >= p->size) {
113
- p->size *= 2;
114
- REALLOC_N(p->positions, int, p->size);
115
- REALLOC_N(p->offsets, TVOffsetInfo *, p->size);
110
+ if (self->freq >= self->size) {
111
+ self->size *= 2;
112
+ REALLOC_N(self->positions, int, self->size);
113
+ REALLOC_N(self->offsets, TVOffsetInfo *, self->size);
116
114
  }
117
- p->positions[p->freq] = position;
118
- p->offsets[p->freq] = offset;
119
- p->freq++;
115
+ self->positions[self->freq] = position;
116
+ self->offsets[self->freq] = offset;
117
+ self->freq++;
120
118
  }
121
119
 
122
120
  inline int p_cmp(const void *const p1, const void *const p2)
@@ -137,47 +135,49 @@ DocumentWriter *dw_open(Store *store,
137
135
  int max_field_length,
138
136
  int term_index_interval)
139
137
  {
140
- DocumentWriter *dw = ALLOC(DocumentWriter);
141
- dw->store = store;
142
- dw->analyzer = analyzer;
143
- dw->similarity = similarity;
144
- dw->fis = NULL;
145
- dw->postingtable = h_new(&term_hash, &term_eq, &term_destroy, &p_destroy);
146
- dw->max_field_length = max_field_length;
147
- dw->term_index_interval = term_index_interval;
148
- return dw;
138
+ DocumentWriter *self = ALLOC(DocumentWriter);
139
+ self->store = store;
140
+ self->analyzer = analyzer;
141
+ self->similarity = similarity;
142
+ self->fis = NULL;
143
+ self->postingtable = h_new(&term_hash, &term_eq,
144
+ (free_ft)&term_destroy,
145
+ (free_ft)&p_destroy);
146
+ self->max_field_length = max_field_length;
147
+ self->term_index_interval = term_index_interval;
148
+ return self;
149
149
  }
150
150
 
151
- void dw_close(DocumentWriter *dw)
151
+ void dw_close(DocumentWriter *self)
152
152
  {
153
- if (dw->fis) fis_destroy(dw->fis);
154
- h_destroy(dw->postingtable);
155
- free(dw);
153
+ if (self->fis) fis_destroy(self->fis);
154
+ h_destroy(self->postingtable);
155
+ free(self);
156
156
  }
157
157
 
158
- void dw_add_position(DocumentWriter *dw, char *field, char *text,
158
+ void dw_add_position(DocumentWriter *self, char *field, char *text,
159
159
  int position, TVOffsetInfo *offset)
160
160
  {
161
161
  Term termbuf = {field, text}, *term;
162
- Posting *p = (Posting *)h_get(dw->postingtable, &termbuf);
162
+ Posting *p = (Posting *)h_get(self->postingtable, &termbuf);
163
163
 
164
- if (p) { // word seen before
165
- // double the size of posting to make room for more posts.
164
+ if (p) { /* word seen before */
166
165
  if (p->freq >= p->size) {
166
+ /* double size of posting to make room for more posts. */
167
167
  p->size <<= 1;
168
168
  REALLOC_N(p->positions, int, p->size);
169
169
  p->offsets = REALLOC_N(p->offsets, TVOffsetInfo *, p->size);
170
170
  }
171
- p->positions[p->freq] = position; // add new position
172
- p->offsets[p->freq] = offset; // add new position
173
- p->freq++; // update frequency
174
- } else { // word not seen before
171
+ p->positions[p->freq] = position; /* add new position */
172
+ p->offsets[p->freq] = offset; /* add new offset */
173
+ p->freq++; /* update frequency */
174
+ } else { /* word not seen before */
175
175
  term = term_create(field, text);
176
- h_set(dw->postingtable, term, p_create(term, position, offset));
176
+ h_set(self->postingtable, term, p_create(term, position, offset));
177
177
  }
178
178
  }
179
179
 
180
- void dw_invert_doc(DocumentWriter *dw, Document *doc)
180
+ void dw_invert_doc(DocumentWriter *self, Document *doc)
181
181
  {
182
182
  int i;
183
183
  int dfcnt = doc->dfcnt;
@@ -191,69 +191,74 @@ void dw_invert_doc(DocumentWriter *dw, Document *doc)
191
191
  for (i = 0; i < dfcnt; i++) {
192
192
  field = fields[i];
193
193
  field_name = field->name;
194
- fi = ((FieldInfo *)ht_get(dw->fis->by_name, field_name));
194
+ fi = ((FieldInfo *)ht_get(self->fis->by_name, field_name));
195
195
  field_number = fi->number;
196
196
 
197
- length = dw->field_lengths[field_number];
198
- offset = dw->field_offsets[field_number];
199
- position = dw->field_positions[field_number];
197
+ length = self->field_lengths[field_number];
198
+ offset = self->field_offsets[field_number];
199
+ position = self->field_positions[field_number];
200
200
 
201
201
  if (fi->is_indexed) {
202
- if (!field->is_tokenized) {// un-tokenized field
202
+ if (!field->is_tokenized) { /* un-tokenized field */
203
203
  text = field->data;
204
- slen = strlen(text);
204
+ slen = (int)strlen(text);
205
205
  if (fi->store_offset) {
206
- dw_add_position(dw, field_name, text, position,
206
+ dw_add_position(self, field_name, text, position,
207
207
  tvoi_create(offset, offset+slen));
208
208
  } else {
209
- dw_add_position(dw, field_name, text, position, NULL);
209
+ dw_add_position(self, field_name, text, position, NULL);
210
210
  }
211
211
  offset += slen;
212
212
  length++;
213
213
  } else {
214
214
 
215
- // Tokenize field and add to posting_table
216
- stream = a_get_ts(dw->analyzer, field_name, field->data);
215
+ /* Tokenize field and add to posting_table */
216
+ stream = a_get_ts(self->analyzer, field_name, field->data);
217
217
 
218
218
  while ((token = ts_next(stream)) != NULL) {
219
219
  position += (token->pos_inc - 1);
220
220
 
221
221
  if (fi->store_offset) {
222
- dw_add_position(dw,
222
+ dw_add_position(self,
223
223
  field_name,
224
224
  token->text,
225
225
  position,
226
226
  tvoi_create(offset + token->start, offset + token->end));
227
227
  position++;
228
228
  } else {
229
- dw_add_position(dw, field_name, token->text, position, NULL);
229
+ dw_add_position(self, field_name, token->text, position, NULL);
230
230
  position++;
231
231
  }
232
232
 
233
233
  length++;
234
- // stop if we reach the max field length
235
- if (length > dw->max_field_length)
234
+ /* stop if we reach the max field length */
235
+ if (length > self->max_field_length) {
236
236
  break;
237
+ }
237
238
  }
238
239
 
239
- if (token)
240
+ if (token) {
240
241
  offset += token->end + 1;
242
+ }
241
243
  }
242
- dw->field_lengths[field_number] = length;
243
- dw->field_offsets[field_number] = offset;
244
- dw->field_positions[field_number] = position;
245
- dw->field_boosts[field_number] *= field->boost;
244
+ self->field_lengths[field_number] = length;
245
+ self->field_offsets[field_number] = offset;
246
+ self->field_positions[field_number] = position;
247
+ self->field_boosts[field_number] *= field->boost;
246
248
  }
247
249
  }
248
250
  }
249
251
 
250
- Posting **dw_sort_posting_table(DocumentWriter *dw)
252
+ Posting **dw_sort_posting_table(DocumentWriter *self)
251
253
  {
252
- HshTable *ht = dw->postingtable;
253
- int i;
254
- dw->pcnt = i = ht->used;
255
- Posting **postings = ALLOC_N(Posting *, i);
254
+ HshTable *ht = self->postingtable;
256
255
  HshEntry *he = ht->table;
256
+ Posting **postings;
257
+ int i;
258
+
259
+ self->pcnt = i = ht->used;
260
+ postings = ALLOC_N(Posting *, i);
261
+
257
262
  while (i > 0) {
258
263
  if (he->value != NULL) {
259
264
  i--;
@@ -261,16 +266,16 @@ Posting **dw_sort_posting_table(DocumentWriter *dw)
261
266
  }
262
267
  he++;
263
268
  }
264
- qsort(postings, dw->pcnt, sizeof(Posting *), &p_cmp);
269
+ qsort(postings, self->pcnt, sizeof(Posting *), &p_cmp);
265
270
  return postings;
266
271
  }
267
272
 
268
- void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
273
+ void dw_write_postings(DocumentWriter *self, Posting **postings, char *segment)
269
274
  {
270
275
  OutStream * volatile freq_out = NULL, * volatile prox_out = NULL;
271
276
  TermInfosWriter * volatile tiw = NULL;
272
277
  TermVectorsWriter * volatile tvw = NULL;
273
- Store *store = dw->store;
278
+ Store *store = self->store;
274
279
  TermInfo * volatile ti = NULL;
275
280
  Posting *posting;
276
281
  int i, j, posting_freq, position, last_position;
@@ -278,31 +283,31 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
278
283
  strcpy(fname, segment);
279
284
 
280
285
  TRY
281
- //open files for inverse index storage
286
+ /* open files for inverse index storage */
282
287
  sprintf(fname, "%s.frq", segment);
283
288
  freq_out = store->create_output(store, fname);
284
289
  sprintf(fname, "%s.prx", segment);
285
290
  prox_out = store->create_output(store, fname);
286
- tiw = tiw_open(store, segment, dw->fis, dw->term_index_interval);
291
+ tiw = tiw_open(store, segment, self->fis, self->term_index_interval);
287
292
  ti = ti_create(0, 0, 0, 0);
288
293
 
289
- for (i = 0; i < dw->pcnt; i++) {
294
+ for (i = 0; i < self->pcnt; i++) {
290
295
  posting = postings[i];
291
296
 
292
- // add an entry to the dictionary with pointers to prox and freq_out files
297
+ /* add an entry to dictionary with pointers to prox and freq_out files */
293
298
  ti_set(ti, 1, os_pos(freq_out), os_pos(prox_out), -1);
294
299
  tiw_add(tiw, posting->term, ti);
295
300
 
296
- // add an entry to the freq_out file
301
+ /* add an entry to the freq_out file */
297
302
  posting_freq = posting->freq;
298
- if (posting_freq == 1) { // optimize freq=1
299
- os_write_vint(freq_out, 1); // set low bit of doc num.
303
+ if (posting_freq == 1) { /* optimize freq=1 */
304
+ os_write_vint(freq_out, 1); /* set low bit of doc num */
300
305
  } else {
301
- os_write_vint(freq_out, 0); // the doc number
302
- os_write_vint(freq_out, posting_freq); // frequency in doc
306
+ os_write_vint(freq_out, 0); /* the doc number */
307
+ os_write_vint(freq_out, posting_freq); /* frequency in doc */
303
308
  }
304
309
 
305
- last_position = 0; // write positions
310
+ last_position = 0; /* write positions */
306
311
 
307
312
  for (j = 0; j < posting_freq; j++) {
308
313
  position = posting->positions[j];
@@ -310,16 +315,16 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
310
315
  last_position = position;
311
316
  }
312
317
 
313
- // check to see if we switched to a new field
318
+ /* check to see if we switched to a new field */
314
319
  term_field = posting->term->field;
315
320
  if (curr_field != term_field) {
316
321
  FieldInfo *fi;
317
- // changing field - see if there is something to save
322
+ /* changing field - see if there is something to save */
318
323
  curr_field = term_field;
319
- fi = (FieldInfo *)ht_get(dw->fis->by_name, curr_field);
324
+ fi = (FieldInfo *)ht_get(self->fis->by_name, curr_field);
320
325
  if (fi->store_tv) {
321
326
  if (tvw == NULL) {
322
- tvw = tvw_open(store, segment, dw->fis);
327
+ tvw = tvw_open(store, segment, self->fis);
323
328
  tvw_open_doc(tvw);
324
329
  }
325
330
  tvw_open_field(tvw, curr_field);
@@ -328,7 +333,7 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
328
333
  tvw_close_field(tvw);
329
334
  }
330
335
  }
331
- // tvw->curr_field != NULL implies field is still open
336
+ /* tvw->curr_field != NULL implies field is still open */
332
337
  if (tvw != NULL && tvw->curr_field != NULL) {
333
338
  tvw_add_term(tvw, posting->term->text, posting_freq, posting->positions, posting->offsets);
334
339
  }
@@ -338,8 +343,8 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
338
343
  tvw_close_doc(tvw);
339
344
  tvw_close(tvw);
340
345
  }
341
- // make an effort to close all streams we can but remember and re-raise
342
- // the last exception encountered in this process
346
+ /* make an effort to close all streams we can but remember and re-raise
347
+ * the last exception encountered in this process */
343
348
  if (freq_out) os_close(freq_out);
344
349
  if (prox_out) os_close(prox_out);
345
350
  if (tiw) tiw_close(tiw);
@@ -347,24 +352,25 @@ void dw_write_postings(DocumentWriter *dw, Posting **postings, char *segment)
347
352
  XENDTRY
348
353
  }
349
354
 
350
- void dw_write_norms(DocumentWriter *dw, char *segment)
355
+ void dw_write_norms(DocumentWriter *self, char *segment)
351
356
  {
352
357
  int i;
353
358
  float norm;
354
359
  OutStream *norms_out;
355
360
  char fname[SEGMENT_NAME_MAX_LENGTH];
356
- FieldInfos *fis = dw->fis;
361
+ FieldInfos *fis = self->fis;
357
362
  FieldInfo *fi;
358
363
 
359
364
  for (i = 0; i < fis->fcnt; i++) {
360
365
  fi = fis->by_number[i];
361
366
 
362
367
  if (fi->is_indexed && !fi->omit_norms) {
363
- norm = dw->field_boosts[i] * sim_length_norm(dw->similarity, fi->name, dw->field_lengths[i]);
368
+ norm = self->field_boosts[i] *
369
+ sim_length_norm(self->similarity, fi->name, self->field_lengths[i]);
364
370
  sprintf(fname, "%s.f%d", segment, i);
365
- norms_out = dw->store->create_output(dw->store, fname);
371
+ norms_out = self->store->create_output(self->store, fname);
366
372
  TRY
367
- os_write_byte(norms_out, sim_encode_norm(dw->similarity, norm));
373
+ os_write_byte(norms_out, sim_encode_norm(self->similarity, norm));
368
374
  XFINALLY
369
375
  os_close(norms_out);
370
376
  XENDTRY
@@ -372,49 +378,54 @@ void dw_write_norms(DocumentWriter *dw, char *segment)
372
378
  }
373
379
  }
374
380
 
375
- void dw_add_doc(DocumentWriter *dw, char *segment, Document *doc)
381
+ void dw_add_doc(DocumentWriter *self, char *segment, Document *doc)
376
382
  {
383
+ Posting **postings;
384
+ FieldsWriter *fw;
377
385
  int i;
378
- // write field names
379
- dw->fis = fis_create();
380
- fis_add_doc(dw->fis, doc);
381
- fis_write(dw->fis, dw->store, segment, ".fnm");
382
386
 
383
- // write field values
384
- FieldsWriter *fw = fw_open(dw->store, segment, dw->fis);
387
+ /* write field names */
388
+ self->fis = fis_create();
389
+ fis_add_doc(self->fis, doc);
390
+ fis_write(self->fis, self->store, segment, ".fnm");
391
+
392
+ /* write field values */
393
+ fw = fw_open(self->store, segment, self->fis);
385
394
  TRY
386
395
  fw_add_doc(fw, doc);
387
396
  XFINALLY
388
397
  fw_close(fw);
389
398
  XENDTRY
390
399
 
391
- // invert doc into posting_table
392
- h_clear(dw->postingtable); // clear posting_table
400
+ /* invert doc into posting_table */
393
401
 
394
- dw->field_boosts = ALLOC_N(float, dw->fis->fcnt);
395
- dw->field_lengths = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
396
- dw->field_offsets = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
397
- dw->field_positions = ALLOC_AND_ZERO_N(int, dw->fis->fcnt);
402
+ h_clear(self->postingtable); /* clear posting_table */
398
403
 
399
- for (i = 0; i < dw->fis->fcnt; i++)
400
- dw->field_boosts[i] = doc->boost;
404
+ self->field_boosts = ALLOC_N(float, self->fis->fcnt);
405
+ self->field_lengths = ALLOC_AND_ZERO_N(int, self->fis->fcnt);
406
+ self->field_offsets = ALLOC_AND_ZERO_N(int, self->fis->fcnt);
407
+ self->field_positions = ALLOC_AND_ZERO_N(int, self->fis->fcnt);
401
408
 
402
- dw_invert_doc(dw, doc);
409
+ for (i = 0; i < self->fis->fcnt; i++) {
410
+ self->field_boosts[i] = doc->boost;
411
+ }
412
+
413
+ dw_invert_doc(self, doc);
403
414
 
404
- // sort posting_table into an array
405
- Posting **postings = dw_sort_posting_table(dw);
415
+ /* sort posting_table into an array */
416
+ postings = dw_sort_posting_table(self);
406
417
 
407
- // write postings
408
- dw_write_postings(dw, postings, segment);
418
+ /* write postings */
419
+ dw_write_postings(self, postings, segment);
409
420
  free(postings);
410
421
 
411
- // write norms of indexed fields
412
- dw_write_norms(dw, segment);
422
+ /* write norms of indexed fields */
423
+ dw_write_norms(self, segment);
413
424
 
414
- free(dw->field_boosts);
415
- free(dw->field_lengths);
416
- free(dw->field_offsets);
417
- free(dw->field_positions);
425
+ free(self->field_boosts);
426
+ free(self->field_lengths);
427
+ free(self->field_offsets);
428
+ free(self->field_positions);
418
429
  }
419
430
 
420
431
  /****************************************************************************
@@ -432,9 +443,8 @@ SegmentInfo *si_create(char *name, int doc_cnt, Store *store)
432
443
  return si;
433
444
  }
434
445
 
435
- void si_destroy(void *p)
446
+ void si_destroy(SegmentInfo *si)
436
447
  {
437
- SegmentInfo *si = (SegmentInfo *)p;
438
448
  free(si->name);
439
449
  free(si);
440
450
  }
@@ -501,21 +511,19 @@ SegmentInfos *sis_create()
501
511
  return sis;
502
512
  }
503
513
 
504
- void sis_destroy_not_infos(void *p)
514
+ void sis_destroy_not_infos(SegmentInfos *sis)
505
515
  {
506
- SegmentInfos *sis = (SegmentInfos *)p;
507
516
  free(sis->segs);
508
- free(p);
517
+ free(sis);
509
518
  }
510
519
 
511
- void sis_destroy(void *p)
520
+ void sis_destroy(SegmentInfos *sis)
512
521
  {
513
522
  int i;
514
- SegmentInfos *sis = (SegmentInfos *)p;
515
523
  for (i = 0; i < sis->scnt; i++)
516
524
  si_destroy(sis->segs[i]);
517
525
  free(sis->segs);
518
- free(p);
526
+ free(sis);
519
527
  }
520
528
 
521
529
  void sis_add_si(SegmentInfos *sis, SegmentInfo *si)
@@ -533,8 +541,9 @@ void sis_del_at(SegmentInfos *sis, int at)
533
541
  int i;
534
542
  si_destroy(sis->segs[at]);
535
543
  sis->scnt--;
536
- for (i = at; i < sis->scnt; i++)
544
+ for (i = at; i < sis->scnt; i++) {
537
545
  sis->segs[i] = sis->segs[i+1];
546
+ }
538
547
  }
539
548
 
540
549
  void sis_del_from_to(SegmentInfos *sis, int from, int to)
@@ -561,24 +570,25 @@ void sis_clear(SegmentInfos *sis)
561
570
  void sis_read(SegmentInfos *sis, Store *store)
562
571
  {
563
572
  int doc_cnt;
573
+ int seg_count;
574
+ int i;
564
575
  char *name;
565
576
  InStream *is = store->open_input(store, SEGMENT_FILENAME);
566
577
 
567
578
  TRY
568
579
 
569
580
  sis->format = is_read_int(is);
570
- if (sis->format < 0) { // file contains explicit format info
571
- // check that it is a format we can understand
581
+ if (sis->format < 0) { /* file contains explicit format info */
582
+ /* check that it is a format we can understand */
572
583
  if (sis->format < FORMAT)
573
- RAISE(ERROR, FORMAT_VERSION_ERROR_MSG);
574
- sis->version = is_read_long(is);
575
- sis->counter = is_read_int(is);
576
- } else { // file is in old format without explicit format info
584
+ RAISE(EXCEPTION, FORMAT_VERSION_ERROR_MSG);
585
+ sis->version = (uint)is_read_long(is);
586
+ sis->counter = (int)is_read_int(is);
587
+ } else { /* file is in old format without explicit format info */
577
588
  sis->counter = sis->format;
578
589
  }
579
590
 
580
- int seg_count = is_read_int(is);
581
- int i;
591
+ seg_count = is_read_int(is);
582
592
  for (i = 0; i < seg_count; i++) {
583
593
  name = is_read_string(is);
584
594
  doc_cnt = is_read_int(is);
@@ -586,11 +596,12 @@ void sis_read(SegmentInfos *sis, Store *store)
586
596
  }
587
597
 
588
598
  if (sis->format >= 0) {
589
- // in old format the version number may be at the end of the file
590
- if (is_pos(is) >= is_length(is))
591
- sis->version = 0; // old file format without version number
592
- else
593
- sis->version = is_read_long(is); // read version
599
+ /* in old format the version number may be at the end of the file */
600
+ if (is_pos(is) >= is_length(is)) {
601
+ sis->version = 0; /* old file format without version number */
602
+ } else {
603
+ sis->version = (int)is_read_long(is); /* read version */
604
+ }
594
605
  }
595
606
  XFINALLY
596
607
  is_close(is);
@@ -604,7 +615,7 @@ void sis_write(SegmentInfos *sis, Store *store)
604
615
  OutStream *os = store->create_output(store, TEMPORARY_SEGMENT_FILENAME);
605
616
  TRY
606
617
  os_write_int(os, FORMAT);
607
- os_write_long(os, ++(sis->version)); // every write changes the index
618
+ os_write_long(os, ++(sis->version)); /* every write changes the index */
608
619
  os_write_int(os, sis->counter);
609
620
  os_write_int(os, sis->scnt);
610
621
  for (i = 0; i < sis->scnt; i++) {
@@ -617,24 +628,27 @@ void sis_write(SegmentInfos *sis, Store *store)
617
628
  os_close(os);
618
629
  XENDTRY
619
630
 
620
- //install new segment info
631
+ /* install new segment info */
621
632
  store->rename(store, TEMPORARY_SEGMENT_FILENAME, SEGMENT_FILENAME);
622
633
  }
623
634
 
624
635
  int sis_read_current_version(Store *store)
625
636
  {
626
- if (!store->exists(store, SEGMENT_FILENAME))
627
- return 0;
628
- InStream *is = store->open_input(store, SEGMENT_FILENAME);
637
+ InStream *is;
638
+ SegmentInfos *sis;
629
639
  int format = 0;
630
640
  int version = 0;
631
641
 
642
+ if (!store->exists(store, SEGMENT_FILENAME))
643
+ return 0;
644
+ is = store->open_input(store, SEGMENT_FILENAME);
645
+
632
646
  TRY
633
647
  format = is_read_int(is);
634
648
  if (format < 0) {
635
649
  if (format < FORMAT)
636
- RAISE(ERROR, FORMAT_VERSION_ERROR_MSG);
637
- version = is_read_long(is);
650
+ RAISE(EXCEPTION, FORMAT_VERSION_ERROR_MSG);
651
+ version = (int)is_read_long(is);
638
652
  }
639
653
  XFINALLY
640
654
  is_close(is);
@@ -643,11 +657,11 @@ int sis_read_current_version(Store *store)
643
657
  if (format < 0)
644
658
  return version;
645
659
 
646
- // We cannot be sure about the format of the file.
647
- // Therefore we have to read the whole file and cannot simply
648
- // seek to the version entry.
660
+ /* We cannot be sure about the format of the file.
661
+ * Therefore we have to read the whole file and cannot simply
662
+ * seek to the version entry. */
649
663
 
650
- SegmentInfos *sis = sis_create();
664
+ sis = sis_create();
651
665
  sis_read(sis, store);
652
666
  version = sis->version;
653
667
  sis_destroy(sis);
@@ -660,8 +674,10 @@ int sis_read_current_version(Store *store)
660
674
  *
661
675
  ****************************************************************************/
662
676
 
663
- IndexWriter *iw_open(Store *store, Analyzer *analyzer,
664
- bool create, bool close_store, bool close_analyzer)
677
+ /**
678
+ * Deletes the analyzer by default but leaves the store by default
679
+ */
680
+ IndexWriter *iw_open(Store *store, Analyzer *analyzer, bool create)
665
681
  {
666
682
  IndexWriter *iw = ALLOC(IndexWriter);
667
683
  if (create)
@@ -674,15 +690,14 @@ IndexWriter *iw_open(Store *store, Analyzer *analyzer,
674
690
  iw->term_index_interval = config.term_index_interval;
675
691
  iw->use_compound_file = true;
676
692
  iw->store = store;
677
- iw->close_store = close_store;
678
- iw->close_analyzer = close_analyzer;
693
+ ref(store);
679
694
  iw->analyzer = analyzer;
680
695
  iw->sis = sis_create();
681
696
  iw->similarity = sim_create_default();
682
697
  iw->ram_store = open_ram_store();
683
698
 
684
699
  mutex_lock(&store->mutex);
685
- // keep the write_lock obtained until the IndexWriter is closed.
700
+ /* keep the write_lock obtained until the IndexWriter is closed. */
686
701
  iw->write_lock = store->open_lock(store, WRITE_LOCK_NAME);
687
702
  if (!iw->write_lock->obtain(iw->write_lock)) {
688
703
  RAISE(STATE_ERROR, WRITE_LOCK_ERROR_MSG);
@@ -695,7 +710,7 @@ IndexWriter *iw_open(Store *store, Analyzer *analyzer,
695
710
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
696
711
  }
697
712
  TRY
698
- // commit the index
713
+ /* commit the index */
699
714
  store->clear(store);
700
715
  sis_write(iw->sis, store);
701
716
  XFINALLY
@@ -714,8 +729,9 @@ const char base36_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
714
729
  char *new_segment_name(int counter)
715
730
  {
716
731
  char buf[SEGMENT_NAME_MAX_LENGTH];
717
- buf[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
718
732
  int i;
733
+
734
+ buf[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
719
735
  for (i = SEGMENT_NAME_MAX_LENGTH - 2; ; i--) {
720
736
  buf[i] = base36_digitmap[counter%36];
721
737
  counter /= 36;
@@ -749,8 +765,8 @@ void delete_files(Array *file_names, Store *store)
749
765
  Array *sr_file_names(IndexReader *ir);
750
766
  void iw_delete_segments(IndexWriter *iw, IndexReader **segment_readers, int del_cnt)
751
767
  {
752
- // The java version keeps a record of files that it couldn't delete. This
753
- // shouldn't be a problem on linux I hope.
768
+ /* The java version keeps a record of files that it couldn't delete. This
769
+ * shouldn't be a problem on linux I hope. */
754
770
  IndexReader *ir;
755
771
  int i;
756
772
  for (i = 0; i < del_cnt; i++) {
@@ -761,22 +777,25 @@ void iw_delete_segments(IndexWriter *iw, IndexReader **segment_readers, int del_
761
777
 
762
778
  void make_compound_file(IndexWriter *iw, char *merged_name, SegmentMerger *merger)
763
779
  {
780
+ Array *files_to_delete;
781
+ Lock *commit_lock;
764
782
  char merged_tmp[SEGMENT_NAME_MAX_LENGTH], merged_cfs[SEGMENT_NAME_MAX_LENGTH];
765
783
 
766
784
  mutex_lock(&iw->store->mutex);
767
785
  sprintf(merged_tmp, "%s.tmp", merged_name);
768
786
  sprintf(merged_cfs, "%s.cfs", merged_name);
769
787
 
770
- Array *files_to_delete = sm_create_compound_file(merger, merged_tmp);
771
- Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
788
+ files_to_delete = sm_create_compound_file(merger, merged_tmp);
789
+ commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
772
790
 
773
791
  if (!commit_lock->obtain(commit_lock)) {
774
792
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
775
793
  }
776
794
 
777
- // make compound file visible for SegmentReaders
795
+ /* make compound file visible for SegmentReaders */
778
796
  iw->store->rename(iw->store, merged_tmp, merged_cfs);
779
- // delete now unused files of segment
797
+
798
+ /* delete now unused files of segment */
780
799
  delete_files(files_to_delete, iw->store);
781
800
 
782
801
  commit_lock->release(commit_lock);
@@ -787,7 +806,9 @@ void make_compound_file(IndexWriter *iw, char *merged_name, SegmentMerger *merge
787
806
  void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segment)
788
807
  {
789
808
  int i;
790
- IndexReader *segments_to_delete[max_segment - min_segment];
809
+ int merged_doc_count;
810
+ Lock *commit_lock;
811
+ IndexReader **segments_to_delete = ALLOC_N(IndexReader *, max_segment - min_segment);
791
812
  int del_cnt = 0;
792
813
 
793
814
  char *merged_name = new_segment_name(iw->sis->counter++);
@@ -797,31 +818,31 @@ void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segmen
797
818
 
798
819
 
799
820
  for (i = min_segment; i < max_segment; i++) {
800
- reader = sr_open(iw->sis, i, false, false);
821
+ reader = sr_open(iw->sis, i, false);
801
822
  sm_add(merger, reader);
802
- if ((reader->store == iw->store) || // if we own the directory
823
+ if ((reader->store == iw->store) || /* if we own the directory */
803
824
  (reader->store == iw->ram_store)) {
804
- segments_to_delete[del_cnt++] = reader; // queue segment for deletion
825
+ segments_to_delete[del_cnt++] = reader; /* queue segment for deletion */
805
826
  }
806
827
  }
807
828
 
808
- int merged_doc_count = sm_merge(merger);
829
+ merged_doc_count = sm_merge(merger);
809
830
 
810
831
  sis_del_from_to(iw->sis, min_segment, max_segment);
811
832
 
812
833
  sis_add_si(iw->sis, si_create(merged_name, merged_doc_count, iw->store));
813
834
 
814
- // close readers before we attempt to delete now-obsolete segments
835
+ /* close readers before we attempt to delete now-obsolete segments */
815
836
 
816
837
  mutex_lock(&iw->store->mutex);
817
- Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
838
+ commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
818
839
  if (!commit_lock->obtain(commit_lock)) {
819
840
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
820
841
  }
821
- // commit the index
842
+ /* commit the index */
822
843
  sis_write(iw->sis, iw->store);
823
844
  iw_delete_segments(iw, segments_to_delete, del_cnt);
824
- //
845
+
825
846
  commit_lock->release(commit_lock);
826
847
  iw->store->close_lock(commit_lock);
827
848
  mutex_unlock(&iw->store->mutex);
@@ -830,6 +851,7 @@ void iw_merge_segments_with_max(IndexWriter *iw, int min_segment, int max_segmen
830
851
  make_compound_file(iw, merged_name, merger);
831
852
  }
832
853
 
854
+ free(segments_to_delete);
833
855
  sm_destroy(merger);
834
856
  }
835
857
 
@@ -845,23 +867,25 @@ void iw_maybe_merge_segments(IndexWriter *iw)
845
867
  SegmentInfo *si;
846
868
 
847
869
  while (target_merge_docs <= iw->max_merge_docs) {
848
- // find segments smaller than current target size
870
+ /* find segments smaller than current target size */
849
871
  min_segment = iw->sis->scnt - 1;
850
872
  merge_docs = 0;
851
873
  while (min_segment >= 0) {
852
874
  si = iw->sis->segs[min_segment];
853
- if (si->doc_cnt >= target_merge_docs)
875
+ if (si->doc_cnt >= target_merge_docs) {
854
876
  break;
877
+ }
855
878
  merge_docs += si->doc_cnt;
856
879
  min_segment -= 1;
857
880
  }
858
881
 
859
- if (merge_docs >= target_merge_docs) // found a merge to do
882
+ if (merge_docs >= target_merge_docs) { /* found a merge to do */
860
883
  iw_merge_segments(iw, min_segment + 1);
861
- else
884
+ } else {
862
885
  break;
886
+ }
863
887
 
864
- target_merge_docs *= iw->merge_factor; // increase target size
888
+ target_merge_docs *= iw->merge_factor; /* increase target size */
865
889
  }
866
890
  }
867
891
 
@@ -883,12 +907,14 @@ void iw_flush_ram_segments(IndexWriter *iw)
883
907
  * that wasn't the ram segment. But if it fit's in with the merge
884
908
  * factor, why not merge it. Otherwise we leave it and increment min_seg
885
909
  */
886
- if (min_segment < 0 || // add one FS segment?
887
- (doc_count + segs[min_segment]->doc_cnt) > iw->merge_factor ||
888
- (segs[iw->sis->scnt-1]->store != iw->ram_store))
910
+ if ((min_segment < 0) || /* add one FS segment? */
911
+ ((doc_count + segs[min_segment]->doc_cnt) > iw->merge_factor) ||
912
+ (segs[iw->sis->scnt - 1]->store != iw->ram_store)) {
889
913
  min_segment++;
890
- if (min_segment >= iw->sis->scnt)
914
+ }
915
+ if (min_segment >= iw->sis->scnt) {
891
916
  return;
917
+ }
892
918
  iw_merge_segments(iw, min_segment);
893
919
  }
894
920
 
@@ -937,17 +963,16 @@ void iw_close(IndexWriter *iw)
937
963
  {
938
964
  mutex_lock(&iw->mutex);
939
965
  iw_flush_ram_segments(iw);
940
- ram_close(iw->ram_store);
966
+ store_deref(iw->ram_store);
941
967
  sis_destroy(iw->sis);
942
968
 
943
969
  sim_destroy(iw->similarity);
944
- if (iw->close_analyzer) a_destroy(iw->analyzer);
970
+ a_deref(iw->analyzer);
945
971
 
946
972
  iw->write_lock->release(iw->write_lock);
947
973
  iw->store->close_lock(iw->write_lock);
948
974
 
949
- if (iw->close_store)
950
- store_close(iw->store);
975
+ store_deref(iw->store);
951
976
  mutex_destroy(&iw->mutex);
952
977
  free(iw);
953
978
  }
@@ -957,13 +982,13 @@ void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
957
982
  int i, j, end, start;
958
983
 
959
984
  mutex_lock(&iw->mutex);
960
- iw_optimize_internal(iw); // start with zero or 1 seg
985
+ iw_optimize_internal(iw); /* start with zero or 1 seg */
961
986
 
962
987
  start = iw->sis->scnt;
963
988
 
964
989
  for (i = 0; i < cnt; i++) {
965
990
  Store *store = stores[i];
966
- SegmentInfos *sis = sis_create(); // read infos from dir
991
+ SegmentInfos *sis = sis_create(); /* read infos from dir */
967
992
  sis_read(sis, store);
968
993
 
969
994
  for (j = 0; j < sis->scnt; j++) {
@@ -973,7 +998,7 @@ void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
973
998
  sis_destroy_not_infos(sis);
974
999
  }
975
1000
 
976
- // merge newly added segments in log(n) passes
1001
+ /* merge newly added segments in log(n) passes */
977
1002
  while (iw->sis->scnt > start + iw->merge_factor) {
978
1003
  for (i = start + 1; i < iw->sis->scnt; i++) {
979
1004
  end = MIN(iw->sis->scnt, i + iw->merge_factor);
@@ -983,7 +1008,7 @@ void iw_add_indexes(IndexWriter *iw, Store **stores, int cnt)
983
1008
  }
984
1009
  }
985
1010
 
986
- // final cleanup
1011
+ /* final cleanup */
987
1012
  iw_optimize_internal(iw);
988
1013
  mutex_unlock(&iw->mutex);
989
1014
  }
@@ -996,16 +1021,20 @@ void iw_add_readers(IndexWriter *iw, IndexReader **irs, int cnt)
996
1021
  {
997
1022
  IndexReader *ir = NULL;
998
1023
  int i, del_cnt = 0;
999
-
1024
+ int doc_count;
1025
+ char *merged_name;
1026
+ SegmentMerger *merger;
1027
+ Lock *commit_lock;
1028
+
1000
1029
  mutex_lock(&iw->mutex);
1001
- iw_optimize_internal(iw); // start with zero or 1 seg
1030
+ iw_optimize_internal(iw); /* start with zero or 1 seg */
1002
1031
 
1003
- char *merged_name = new_segment_name(iw->sis->counter++);
1032
+ merged_name = new_segment_name(iw->sis->counter++);
1004
1033
 
1005
- SegmentMerger *merger = sm_create(iw->store, merged_name, iw->term_index_interval);
1006
- merger->readers->free_elem = NULL; // don't close readers
1034
+ merger = sm_create(iw->store, merged_name, iw->term_index_interval);
1035
+ merger->readers->free_elem = NULL; /* don't close readers */
1007
1036
 
1008
- if (iw->sis->scnt == 1) {// add existing index, if any
1037
+ if (iw->sis->scnt == 1) { /* add existing index, if any */
1009
1038
  ir = sr_open_si(iw->sis->segs[0]);
1010
1039
  sm_add(merger, ir);
1011
1040
  del_cnt = 1;
@@ -1015,18 +1044,19 @@ void iw_add_readers(IndexWriter *iw, IndexReader **irs, int cnt)
1015
1044
  sm_add(merger, irs[i]);
1016
1045
  }
1017
1046
 
1018
- int doc_count = sm_merge(merger); // merge 'em
1047
+ doc_count = sm_merge(merger); /* merge 'em */
1019
1048
 
1020
- // pop old infos and add new ones.
1049
+ /* pop old infos and add new ones. */
1021
1050
  sis_clear(iw->sis);
1022
1051
  sis_add_si(iw->sis, si_create(merged_name, doc_count, iw->store));
1023
1052
 
1024
1053
 
1025
- Lock *commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
1026
- if (!commit_lock->obtain(commit_lock)) // obtain write lock
1054
+ commit_lock = iw->store->open_lock(iw->store, COMMIT_LOCK_NAME);
1055
+ if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
1027
1056
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
1057
+ }
1028
1058
 
1029
- sis_write(iw->sis, iw->store); // commit changes
1059
+ sis_write(iw->sis, iw->store); /* commit changes */
1030
1060
  iw_delete_segments(iw, &ir, del_cnt);
1031
1061
  if (ir) ir_close(ir);
1032
1062
 
@@ -1059,24 +1089,28 @@ Norm *norm_create(InStream *is, int field_num)
1059
1089
  return norm;
1060
1090
  }
1061
1091
 
1062
- void norm_destroy(void *p)
1092
+ void norm_destroy(Norm *norm)
1063
1093
  {
1064
- Norm *norm = (Norm *)p;
1065
1094
  is_close(norm->is);
1066
- if (norm->bytes != NULL) free(norm->bytes);
1095
+ if (norm->bytes != NULL) {
1096
+ free(norm->bytes);
1097
+ }
1067
1098
  free(norm);
1068
1099
  }
1069
1100
 
1070
1101
  void norm_rewrite(Norm *norm, Store *store, char *segment,
1071
1102
  int doc_count, Store *cfs_store)
1072
1103
  {
1073
- if (norm->bytes == NULL)
1074
- return; // These norms do not need to be rewritten
1075
-
1104
+ OutStream *os;
1076
1105
  char tmp_fname[SEGMENT_NAME_MAX_LENGTH];
1077
1106
  char norm_fname[SEGMENT_NAME_MAX_LENGTH];
1107
+
1108
+ if (norm->bytes == NULL) {
1109
+ return; /* These norms do not need to be rewritten */
1110
+ }
1111
+
1078
1112
  sprintf(tmp_fname, "%s.tmp", segment);
1079
- OutStream *os = store->create_output(store, tmp_fname);
1113
+ os = store->create_output(store, tmp_fname);
1080
1114
  TRY
1081
1115
  os_write_bytes(os, norm->bytes, doc_count);
1082
1116
  XFINALLY
@@ -1097,7 +1131,7 @@ void norm_rewrite(Norm *norm, Store *store, char *segment,
1097
1131
  *
1098
1132
  ****************************************************************************/
1099
1133
 
1100
- #define GET_SR SegmentReader *sr = (SegmentReader *)ir->data;
1134
+ #define GET_SR SegmentReader *sr = (SegmentReader *)ir->data
1101
1135
 
1102
1136
  int sr_max_doc(IndexReader *ir)
1103
1137
  {
@@ -1129,8 +1163,8 @@ void sr_close(IndexReader *ir)
1129
1163
 
1130
1164
  if (sr->freq_in) is_close(sr->freq_in);
1131
1165
  if (sr->prox_in) is_close(sr->prox_in);
1166
+
1132
1167
  fis_destroy(sr->fis);
1133
-
1134
1168
  sr_close_norms(sr);
1135
1169
 
1136
1170
  if (sr->orig_tvr) {
@@ -1139,7 +1173,7 @@ void sr_close(IndexReader *ir)
1139
1173
  ary_destroy(sr->tvr_bucket);
1140
1174
  }
1141
1175
  if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
1142
- if (sr->cfs_store) sr->cfs_store->close(sr->cfs_store);
1176
+ if (sr->cfs_store) store_deref(sr->cfs_store);
1143
1177
  if (sr->fake_norms) free(sr->fake_norms);
1144
1178
  free(sr->segment);
1145
1179
  free(sr);
@@ -1175,8 +1209,8 @@ bool sr_is_deleted(IndexReader *ir, int doc_num)
1175
1209
 
1176
1210
  bool sr_has_norms(IndexReader *ir, char *field)
1177
1211
  {
1178
- bool has_norms;
1179
1212
  GET_SR;
1213
+ bool has_norms;
1180
1214
  mutex_lock(&ir->mutex);
1181
1215
  has_norms = h_has_key(sr->norms, field);
1182
1216
  mutex_unlock(&ir->mutex);
@@ -1215,13 +1249,13 @@ TermEnum *sr_terms_from(IndexReader *ir, Term *term)
1215
1249
 
1216
1250
  Document *sr_get_doc(IndexReader *ir, int doc_num)
1217
1251
  {
1252
+ GET_SR;
1218
1253
  Document *doc;
1219
1254
  mutex_lock(&ir->mutex);
1220
1255
  if (sr_is_deleted_internal(ir, doc_num)) {
1221
1256
  mutex_unlock(&ir->mutex);
1222
1257
  RAISE(STATE_ERROR, DELETED_DOC_ERROR_MSG);
1223
1258
  }
1224
- GET_SR;
1225
1259
  doc = fr_get_doc(sr->fr, doc_num);
1226
1260
  mutex_unlock(&ir->mutex);
1227
1261
  return doc;
@@ -1234,11 +1268,11 @@ sr_get_norms_into_internal(IndexReader *ir, char *field, uchar *buf, int offset)
1234
1268
  Norm *norm = h_get(sr->norms, field);
1235
1269
  if (norm == NULL) {
1236
1270
  memset(buf + offset*sizeof(uchar), 0, sr_max_doc(ir)*sizeof(uchar));
1237
- } else if (norm->bytes != NULL) { // can copy from cache
1271
+ } else if (norm->bytes != NULL) { /* can copy from cache */
1238
1272
  memcpy(buf + offset*sizeof(uchar), norm->bytes, sr_max_doc(ir)*sizeof(uchar));
1239
1273
  } else {
1240
1274
  InStream *norm_in = is_clone(norm->is);
1241
- // read from disk
1275
+ /* read from disk */
1242
1276
  is_seek(norm_in, 0);
1243
1277
  is_read_bytes(norm_in, buf, offset, sr_max_doc(ir));
1244
1278
  is_close(norm_in);
@@ -1256,13 +1290,14 @@ static inline uchar *sr_get_norms_internal(IndexReader *ir, char *field)
1256
1290
  {
1257
1291
  GET_SR;
1258
1292
  Norm *norm = h_get(sr->norms, field);
1259
- if (norm == NULL) // not an indexed field
1293
+ if (norm == NULL) { /* not an indexed field */
1260
1294
  return NULL;
1295
+ }
1261
1296
 
1262
- if (norm->bytes == NULL) { // value not yet read
1297
+ if (norm->bytes == NULL) { /* value not yet read */
1263
1298
  uchar *bytes = ALLOC_N(uchar, ir->max_doc(ir));
1264
1299
  sr_get_norms_into_internal(ir, field, bytes, 0);
1265
- norm->bytes = bytes; // cache it
1300
+ norm->bytes = bytes; /* cache it */
1266
1301
  }
1267
1302
  return norm->bytes;
1268
1303
  }
@@ -1278,8 +1313,8 @@ uchar *sr_get_norms(IndexReader *ir, char *field)
1278
1313
 
1279
1314
  static inline uchar *sr_get_norms_always(IndexReader *ir, char *field)
1280
1315
  {
1281
- uchar *bytes;
1282
1316
  GET_SR;
1317
+ uchar *bytes;
1283
1318
  mutex_lock(&ir->mutex);
1284
1319
 
1285
1320
  bytes = sr_get_norms_internal(ir, field);
@@ -1303,7 +1338,7 @@ void sr_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
1303
1338
 
1304
1339
  norm = h_get(sr->norms, field);
1305
1340
  if (norm != NULL) { /* an indexed field */
1306
- norm->is_dirty = true; // mark it dirty
1341
+ norm->is_dirty = true; /* mark it dirty */
1307
1342
  sr->norms_dirty = true;
1308
1343
 
1309
1344
  sr_get_norms_internal(ir, field)[doc_num] = val;
@@ -1318,13 +1353,15 @@ int sr_doc_freq(IndexReader *ir, Term *t)
1318
1353
  int df = ti->doc_freq;
1319
1354
  ti_destroy(ti);
1320
1355
  return df;
1321
- } else return 0;
1356
+ } else {
1357
+ return 0;
1358
+ }
1322
1359
  }
1323
1360
 
1324
1361
  Array *sr_file_names(IndexReader *ir)
1325
1362
  {
1326
1363
  GET_SR;
1327
- Array *file_names = ary_create(0, &efree);
1364
+ Array *file_names = ary_create(0, &free);
1328
1365
  FieldInfo *fi;
1329
1366
  int i;
1330
1367
  char fname[SEGMENT_NAME_MAX_LENGTH];
@@ -1352,8 +1389,8 @@ Array *sr_file_names(IndexReader *ir)
1352
1389
 
1353
1390
  HashSet *sr_get_field_names(IndexReader *ir, int field_type)
1354
1391
  {
1355
- int i;
1356
1392
  GET_SR;
1393
+ int i;
1357
1394
  HashSet *field_set = hs_str_create(NULL);
1358
1395
  FieldInfo *fi;
1359
1396
  for (i = 0; i < sr->fis->fcnt; i++) {
@@ -1396,9 +1433,10 @@ HashSet *sr_get_field_names(IndexReader *ir, int field_type)
1396
1433
  int sr_num_docs(IndexReader *ir)
1397
1434
  {
1398
1435
  GET_SR;
1399
-
1436
+ int num_docs;
1437
+
1400
1438
  mutex_lock(&ir->mutex);
1401
- int num_docs = sr_max_doc(ir);
1439
+ num_docs = sr_max_doc(ir);
1402
1440
  if (sr->deleted_docs != NULL)
1403
1441
  num_docs -= sr->deleted_docs->count;
1404
1442
  mutex_unlock(&ir->mutex);
@@ -1444,8 +1482,9 @@ TermVector *sr_get_term_vector(IndexReader *ir, int doc_num, char *field)
1444
1482
  FieldInfo *fi = (FieldInfo *)ht_get(sr->fis->by_name, field);
1445
1483
  TermVectorsReader *tvr;
1446
1484
 
1447
- if (fi == NULL || !fi->store_tv || !sr->orig_tvr || !(tvr = sr_tvr(sr)))
1485
+ if (fi == NULL || !fi->store_tv || !sr->orig_tvr || !(tvr = sr_tvr(sr))) {
1448
1486
  return NULL;
1487
+ }
1449
1488
 
1450
1489
  return tvr_get_field_tv(tvr, doc_num, field);
1451
1490
  }
@@ -1454,8 +1493,9 @@ Array *sr_get_term_vectors(IndexReader *ir, int doc_num)
1454
1493
  {
1455
1494
  GET_SR;
1456
1495
  TermVectorsReader *tvr;
1457
- if (sr->orig_tvr == NULL || (tvr = sr_tvr(sr)) == NULL)
1496
+ if (sr->orig_tvr == NULL || (tvr = sr_tvr(sr)) == NULL) {
1458
1497
  return NULL;
1498
+ }
1459
1499
 
1460
1500
  return tvr_get_tv(tvr, doc_num);
1461
1501
  }
@@ -1465,16 +1505,17 @@ void sr_commit(IndexReader *ir)
1465
1505
  GET_SR;
1466
1506
  char tmp_fname[SEGMENT_NAME_MAX_LENGTH];
1467
1507
  char del_fname[SEGMENT_NAME_MAX_LENGTH];
1508
+
1468
1509
  sprintf(del_fname, "%s.del", sr->segment);
1469
1510
 
1470
- if (sr->deleted_docs_dirty) { // re-write deleted
1511
+ if (sr->deleted_docs_dirty) { /* re-write deleted */
1471
1512
  sprintf(tmp_fname, "%s.tmp", sr->segment);
1472
1513
  bv_write(sr->deleted_docs, ir->store, tmp_fname);
1473
1514
  ir->store->rename(ir->store, tmp_fname, del_fname);
1474
1515
  }
1475
1516
  if (sr->undelete_all && ir->store->exists(ir->store, del_fname))
1476
1517
  ir->store->remove(ir->store, del_fname);
1477
- if (sr->norms_dirty) {// re-write norms
1518
+ if (sr->norms_dirty) {/* re-write norms */
1478
1519
  int i;
1479
1520
  FieldInfo *fi;
1480
1521
  for (i = 0; i < sr->fis->fcnt; i++) {
@@ -1494,6 +1535,8 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1494
1535
  {
1495
1536
  Store *store = si->store;
1496
1537
  SegmentReader *sr = ALLOC(SegmentReader);
1538
+ char fname[SEGMENT_NAME_MAX_LENGTH];
1539
+
1497
1540
  ir->get_term_vector = &sr_get_term_vector;
1498
1541
  ir->get_term_vectors = &sr_get_term_vectors;
1499
1542
  ir->num_docs = &sr_num_docs;
@@ -1518,7 +1561,6 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1518
1561
  ir->do_close = &sr_close;
1519
1562
  ir->data = sr;
1520
1563
  sr->segment = estrdup(si->name);
1521
- char fname[SEGMENT_NAME_MAX_LENGTH];
1522
1564
  sr->cfs_store = NULL;
1523
1565
  sr->fake_norms = NULL;
1524
1566
  sprintf(fname, "%s.cfs", sr->segment);
@@ -1545,13 +1587,13 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1545
1587
  sr->freq_in = store->open_input(store, fname);
1546
1588
  sprintf(fname, "%s.prx", sr->segment);
1547
1589
  sr->prox_in = store->open_input(store, fname);
1548
- sr->norms = h_new_str(NULL, &norm_destroy);
1590
+ sr->norms = h_new_str((free_ft)NULL, (free_ft)&norm_destroy);
1549
1591
  sr_open_norms(ir, store);
1550
1592
 
1551
1593
  if (fis_has_vectors(sr->fis)) {
1552
1594
  sr->orig_tvr = tvr_open(store, sr->segment, sr->fis);
1553
1595
  thread_key_create(&sr->thread_tvr, NULL);
1554
- sr->tvr_bucket = ary_create(1, (destroy_func_t)&tvr_close);
1596
+ sr->tvr_bucket = ary_create(1, (free_ft)&tvr_close);
1555
1597
  } else {
1556
1598
  sr->orig_tvr = NULL;
1557
1599
  }
@@ -1560,16 +1602,19 @@ IndexReader *sr_open_internal(IndexReader *ir, SegmentInfo *si)
1560
1602
 
1561
1603
  IndexReader *sr_open_si(SegmentInfo *si)
1562
1604
  {
1563
- IndexReader *ir = ir_create(si->store, NULL, false, false);
1605
+ IndexReader *ir = ir_create(si->store, NULL, false);
1606
+ ref(si->store);
1564
1607
  return sr_open_internal(ir, si);
1565
1608
  }
1566
1609
 
1567
- IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_store)
1610
+ IndexReader *sr_open(SegmentInfos *sis, int si_num, bool is_owner)
1568
1611
  {
1569
1612
  SegmentInfo *si = sis->segs[si_num];
1570
- IndexReader *ir = ir_create(si->store, sis, is_owner, close_store);
1613
+ IndexReader *ir = ir_create(si->store, sis, is_owner);
1614
+ ref(si->store);
1571
1615
  return sr_open_internal(ir, si);
1572
1616
  }
1617
+
1573
1618
  /****************************************************************************
1574
1619
  *
1575
1620
  * MultiReader
@@ -1579,14 +1624,14 @@ IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_stor
1579
1624
  #define GET_MR MultiReader *mr = (MultiReader *)ir->data
1580
1625
  #define GET_READER(doc_num) MultiReader *mr = (MultiReader *)ir->data;\
1581
1626
  int i = mr_reader_index(mr, doc_num);\
1582
- IndexReader *reader = mr->sub_readers[i];
1627
+ IndexReader *reader = mr->sub_readers[i]
1583
1628
 
1584
1629
 
1585
1630
 
1586
1631
  int mr_reader_index(MultiReader *mr, int doc_num)
1587
1632
  {
1588
- int lo = 0; // search @starts array
1589
- int hi = mr->rcnt - 1; // for first element less
1633
+ int lo = 0; /* search @starts array */
1634
+ int hi = mr->rcnt - 1; /* for first element less */
1590
1635
  int mid;
1591
1636
  int mid_value;
1592
1637
 
@@ -1597,9 +1642,9 @@ int mr_reader_index(MultiReader *mr, int doc_num)
1597
1642
  hi = mid - 1;
1598
1643
  } else if (doc_num > mid_value) {
1599
1644
  lo = mid + 1;
1600
- } else { // found a match
1645
+ } else { /* found a match */
1601
1646
  while ((mid+1 < mr->rcnt) && (mr->starts[mid+1] == mid_value))
1602
- mid += 1; // scan to last match in case we have empty segments
1647
+ mid += 1; /* scan to last match in case we have empty segments */
1603
1648
  return mid;
1604
1649
  }
1605
1650
  }
@@ -1652,10 +1697,11 @@ Document *mr_get_doc(IndexReader *ir, int doc_num)
1652
1697
  void mr_get_norms_into(IndexReader *ir, char *field, uchar *buf, int offset)
1653
1698
  {
1654
1699
  int i;
1700
+ uchar *bytes;
1655
1701
  GET_MR;
1656
1702
 
1657
1703
  mutex_lock(&ir->mutex);
1658
- uchar *bytes = h_get(mr->norms_cache, field);
1704
+ bytes = h_get(mr->norms_cache, field);
1659
1705
  if (bytes != NULL) {
1660
1706
  memcpy(buf + offset, bytes, mr->max_doc);
1661
1707
  } else {
@@ -1684,7 +1730,7 @@ uchar *mr_get_norms(IndexReader *ir, char *field)
1684
1730
  reader = mr->sub_readers[i];
1685
1731
  reader->get_norms_into(reader, field, bytes, mr->starts[i]);
1686
1732
  }
1687
- h_set(mr->norms_cache, field, bytes); // update cache
1733
+ h_set(mr->norms_cache, field, bytes); /* update cache */
1688
1734
  }
1689
1735
  mutex_unlock(&ir->mutex);
1690
1736
 
@@ -1694,7 +1740,7 @@ uchar *mr_get_norms(IndexReader *ir, char *field)
1694
1740
  void mr_set_norm(IndexReader *ir, int doc_num, char *field, uchar val)
1695
1741
  {
1696
1742
  GET_READER(doc_num);
1697
- h_del(mr->norms_cache, field); // clear cache
1743
+ h_del(mr->norms_cache, field); /* clear cache */
1698
1744
  ir_set_norm(reader, doc_num - mr->starts[i], field, val);
1699
1745
  }
1700
1746
 
@@ -1712,7 +1758,7 @@ TermEnum *mr_terms_from(IndexReader *ir, Term *term)
1712
1758
 
1713
1759
  int mr_doc_freq(IndexReader *ir, Term *t)
1714
1760
  {
1715
- int total = 0, i; // sum freqs in segments
1761
+ int total = 0, i; /* sum freqs in segments */
1716
1762
  GET_MR;
1717
1763
 
1718
1764
  IndexReader *reader;
@@ -1738,9 +1784,10 @@ TermDocEnum *mr_term_positions(IndexReader *ir)
1738
1784
  void mr_delete_doc(IndexReader *ir, int doc_num)
1739
1785
  {
1740
1786
  GET_READER(doc_num);
1741
- mr->num_docs_cache = -1; // invalidate cache
1787
+ mr->num_docs_cache = -1; /* invalidate cache */
1742
1788
 
1743
- reader->do_delete_doc(reader, doc_num - mr->starts[i]); // dispatch to segment reader
1789
+ /* dispatch to segment reader */
1790
+ reader->do_delete_doc(reader, doc_num - mr->starts[i]);
1744
1791
  mr->has_deletions = true;
1745
1792
  }
1746
1793
 
@@ -1778,8 +1825,9 @@ void mr_undelete_all(IndexReader *ir)
1778
1825
  {
1779
1826
  int i;
1780
1827
  GET_MR;
1781
- mr->num_docs_cache = -1; // invalidate cache
1782
1828
  IndexReader *reader;
1829
+
1830
+ mr->num_docs_cache = -1; /* invalidate cache */
1783
1831
  for (i = 0; i < mr->rcnt; i++) {
1784
1832
  reader = mr->sub_readers[i];
1785
1833
  reader->do_undelete_all(reader);
@@ -1829,12 +1877,12 @@ void mr_close(IndexReader *ir)
1829
1877
  IndexReader *mr_open(Store *store,
1830
1878
  SegmentInfos *sis,
1831
1879
  IndexReader **sub_readers,
1832
- int rcnt,
1833
- int close_store)
1880
+ int rcnt)
1834
1881
  {
1835
1882
  int i;
1836
1883
  MultiReader *mr = ALLOC(MultiReader);
1837
1884
  IndexReader *sub_reader;
1885
+ IndexReader *ir;
1838
1886
  mr->sub_readers = sub_readers;
1839
1887
  mr->rcnt = rcnt;
1840
1888
 
@@ -1846,15 +1894,16 @@ IndexReader *mr_open(Store *store,
1846
1894
  for (i = 0; i < rcnt; i++) {
1847
1895
  sub_reader = sub_readers[i];
1848
1896
  mr->starts[i] = mr->max_doc;
1849
- mr->max_doc += sub_reader->max_doc(sub_reader); // compute max_docs
1897
+ mr->max_doc += sub_reader->max_doc(sub_reader); /* compute max_docs */
1850
1898
 
1851
- if (sub_reader->has_deletions(sub_reader))
1899
+ if (sub_reader->has_deletions(sub_reader)) {
1852
1900
  mr->has_deletions = true;
1901
+ }
1853
1902
  }
1854
1903
  mr->starts[rcnt] = mr->max_doc;
1855
- mr->norms_cache = h_new_str(NULL, &efree);
1904
+ mr->norms_cache = h_new_str(NULL, &free);
1856
1905
 
1857
- IndexReader *ir = ir_create(store, sis, true, close_store);
1906
+ ir = ir_create(store, sis, true);
1858
1907
  ir->get_term_vector = &mr_get_term_vector;
1859
1908
  ir->get_term_vectors = &mr_get_term_vectors;
1860
1909
  ir->num_docs = &mr_num_docs;
@@ -1888,11 +1937,8 @@ IndexReader *mr_open(Store *store,
1888
1937
  *
1889
1938
  ****************************************************************************/
1890
1939
 
1891
- bool smi_lt(void *p1, void *p2)
1940
+ bool smi_lt(SegmentMergeInfo *smi1, SegmentMergeInfo *smi2)
1892
1941
  {
1893
- SegmentMergeInfo *smi1 = (SegmentMergeInfo *)p1;
1894
- SegmentMergeInfo *smi2 = (SegmentMergeInfo *)p2;
1895
-
1896
1942
  int cmpres = tb_cmp(smi1->tb, smi2->tb);
1897
1943
  if (cmpres == 0) {
1898
1944
  return smi1->base < smi2->base;
@@ -1906,8 +1952,9 @@ int *smi_load_doc_map(SegmentMergeInfo *smi)
1906
1952
  IndexReader *ir = smi->ir;
1907
1953
  if (ir->has_deletions(ir) && (smi->doc_map == NULL)) {
1908
1954
  int max_doc = ir->max_doc(ir);
1909
- smi->doc_map = ALLOC_N(int, max_doc);
1910
1955
  int j = 0, i;
1956
+
1957
+ smi->doc_map = ALLOC_N(int, max_doc);
1911
1958
  for (i = 0; i < max_doc; i++) {
1912
1959
  if (ir->is_deleted(ir, i)) {
1913
1960
  smi->doc_map[i] = -1;
@@ -1931,9 +1978,8 @@ SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir)
1931
1978
  return smi;
1932
1979
  }
1933
1980
 
1934
- void smi_destroy(void *p)
1981
+ void smi_destroy(SegmentMergeInfo *smi)
1935
1982
  {
1936
- SegmentMergeInfo *smi = (SegmentMergeInfo *)p;
1937
1983
  smi->postings->close(smi->postings);
1938
1984
  smi->te->close(smi->te);
1939
1985
  if (smi->doc_map != NULL)
@@ -1957,7 +2003,7 @@ SegmentMerger *sm_create(Store *store, char *name, int term_index_interval)
1957
2003
  SegmentMerger *sm = ALLOC(SegmentMerger);
1958
2004
  sm->store = store;
1959
2005
  sm->name = estrdup(name);
1960
- sm->readers = ary_create(config.merge_factor, &ir_destroy);
2006
+ sm->readers = ary_create(config.merge_factor, (free_ft)&ir_close);
1961
2007
  sm->fis = NULL;
1962
2008
  sm->freq_out = NULL;
1963
2009
  sm->prox_out = NULL;
@@ -1976,8 +2022,9 @@ void sm_close(SegmentMerger *sm)
1976
2022
  if (sm->freq_out != NULL) os_close(sm->freq_out);
1977
2023
  if (sm->prox_out != NULL) os_close(sm->prox_out);
1978
2024
  if (sm->tiw != NULL) {
1979
- for (i = 0; i < sm->terms_buf_size; i++)
2025
+ for (i = 0; i < sm->terms_buf_size; i++) {
1980
2026
  free(sm->terms_buf[i].text);
2027
+ }
1981
2028
  free(sm->terms_buf);
1982
2029
  tiw_close(sm->tiw);
1983
2030
  }
@@ -1988,9 +2035,8 @@ void sm_close(SegmentMerger *sm)
1988
2035
  sm->queue = NULL;
1989
2036
  }
1990
2037
 
1991
- void sm_destroy(void *p)
2038
+ void sm_destroy(SegmentMerger *sm)
1992
2039
  {
1993
- SegmentMerger *sm = (SegmentMerger *)p;
1994
2040
  if (sm->fis != NULL) fis_destroy(sm->fis);
1995
2041
  ary_destroy(sm->readers);
1996
2042
  sm_close(sm);
@@ -2028,6 +2074,8 @@ int sm_merge_fields(SegmentMerger *sm)
2028
2074
  FieldInfos *fis = sm->fis = fis_create();
2029
2075
  int doc_count = 0;
2030
2076
  Document *doc;
2077
+ FieldsWriter *fw;
2078
+
2031
2079
  for (i = 0; i < sm->readers->size; i++) {
2032
2080
  IndexReader *ir = sm->readers->elems[i];
2033
2081
 
@@ -2049,15 +2097,15 @@ int sm_merge_fields(SegmentMerger *sm)
2049
2097
  }
2050
2098
  fis_write(fis, sm->store, sm->name, ".fnm");
2051
2099
 
2052
- // merge field values
2053
- FieldsWriter *fw = fw_open(sm->store, sm->name, fis);
2100
+ /* merge field values */
2101
+ fw = fw_open(sm->store, sm->name, fis);
2054
2102
 
2055
2103
  TRY
2056
2104
  for (i = 0; i < sm->readers->size; i++) {
2057
2105
  IndexReader *ir = sm->readers->elems[i];
2058
2106
  maxdoc = ir->max_doc(ir);
2059
2107
  for (j = 0; j < maxdoc; j++) {
2060
- if (!ir->is_deleted(ir, j)) { // skip deleted docs
2108
+ if (!ir->is_deleted(ir, j)) { /* skip deleted docs */
2061
2109
  doc = ir->get_doc(ir, j);
2062
2110
  fw_add_doc(fw, doc);
2063
2111
  doc_destroy(doc);
@@ -2098,7 +2146,7 @@ int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
2098
2146
  int i, j;
2099
2147
  int last_doc = 0, base, doc, doc_code, freq, last_position, position;
2100
2148
  int *doc_map = NULL;
2101
- int df = 0; // number of docs w/ term
2149
+ int df = 0; /* number of docs w/ term */
2102
2150
  TermDocEnum *postings;
2103
2151
  SegmentMergeInfo *smi;
2104
2152
  sm_reset_skip(sm);
@@ -2111,31 +2159,34 @@ int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
2111
2159
  stde_seek_ti(postings, smi->te->ti_curr);
2112
2160
  while (postings->next(postings)) {
2113
2161
  doc = postings->doc_num(postings);
2114
- if (doc_map != NULL)
2115
- doc = doc_map[doc]; // work around deletions
2116
- doc += base; // convert to merged space
2162
+ if (doc_map != NULL) {
2163
+ doc = doc_map[doc]; /* work around deletions */
2164
+ }
2165
+ doc += base; /* convert to merged space */
2117
2166
 
2118
- if (doc < last_doc)
2167
+ if (doc < last_doc) {
2119
2168
  RAISE(STATE_ERROR, DOC_ORDER_ERROR_MSG);
2169
+ }
2120
2170
 
2121
2171
  df++;
2122
2172
 
2123
- if ((df % sm->skip_interval) == 0)
2173
+ if ((df % sm->skip_interval) == 0) {
2124
2174
  sm_buffer_skip(sm, last_doc);
2175
+ }
2125
2176
 
2126
- doc_code = (doc - last_doc) << 1; // use low bit to flag freq=1
2177
+ doc_code = (doc - last_doc) << 1; /* use low bit to flag freq=1 */
2127
2178
  last_doc = doc;
2128
2179
 
2129
2180
  freq = postings->freq(postings);
2130
2181
  if (freq == 1) {
2131
- os_write_vint(sm->freq_out, doc_code | 1); // write doc & freq=1
2182
+ os_write_vint(sm->freq_out, doc_code | 1); /* write doc & freq=1 */
2132
2183
  } else {
2133
- os_write_vint(sm->freq_out, doc_code); // write doc
2134
- os_write_vint(sm->freq_out, freq); // write freqency in doc
2184
+ os_write_vint(sm->freq_out, doc_code); /* write doc */
2185
+ os_write_vint(sm->freq_out, freq); /* write freqency in doc */
2135
2186
  }
2136
2187
 
2137
2188
 
2138
- last_position = 0; // write position deltas
2189
+ last_position = 0; /* write position deltas */
2139
2190
  for (j = 0; j < freq; j++) {
2140
2191
  position = postings->next_position(postings);
2141
2192
  os_write_vint(sm->prox_out, position - last_position);
@@ -2167,12 +2218,12 @@ void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **smis, int cnt)
2167
2218
  int freq_pointer = os_pos(sm->freq_out);
2168
2219
  int prox_pointer = os_pos(sm->prox_out);
2169
2220
 
2170
- int df = sm_append_postings(sm, smis, cnt); // append posting data
2221
+ int df = sm_append_postings(sm, smis, cnt); /* append posting data */
2171
2222
 
2172
2223
  int skip_pointer = sm_write_skip(sm);
2173
2224
 
2174
2225
  if (df > 0) {
2175
- // add an entry to the dictionary with pointers to prox and freq files
2226
+ /* add an entry to the dictionary with pointers to prox and freq files */
2176
2227
  ti_set(sm->ti, df, freq_pointer, prox_pointer, (skip_pointer - freq_pointer));
2177
2228
  tiw_add(sm->tiw, sm_tb_to_term(sm, smis[0]->tb), sm->ti);
2178
2229
  }
@@ -2184,7 +2235,7 @@ void sm_merge_term_infos(SegmentMerger *sm)
2184
2235
  int i, match_size;
2185
2236
  IndexReader *ir;
2186
2237
  TermEnum *te;
2187
- SegmentMergeInfo *smi, *top;
2238
+ SegmentMergeInfo *smi, *top, **match;
2188
2239
  TermBuffer *tb;
2189
2240
 
2190
2241
  for (i = 0; i < sm->readers->size; i++) {
@@ -2192,20 +2243,23 @@ void sm_merge_term_infos(SegmentMerger *sm)
2192
2243
  te = ir->terms(ir);
2193
2244
  smi = smi_create(base, te, ir);
2194
2245
  base += ir->num_docs(ir);
2195
- if (smi_next(smi) != NULL)
2196
- pq_push(sm->queue, smi); // initialize @queue
2197
- else
2246
+ if (smi_next(smi) != NULL) {
2247
+ pq_push(sm->queue, smi); /* initialize @queue */
2248
+ } else {
2198
2249
  smi_destroy(smi);
2250
+ }
2199
2251
  }
2200
2252
 
2201
- SegmentMergeInfo **match = ALLOC_N(SegmentMergeInfo *, sm->readers->size);
2253
+ match = ALLOC_N(SegmentMergeInfo *, sm->readers->size);
2202
2254
 
2203
2255
  while (sm->queue->count > 0) {
2204
- // for (i = 1; i <= sm->queue->count; i++) {
2205
- // printf("<{%s:%s}>", ((SegmentMergeInfo *)sm->queue->heap[i])->tb->field,
2206
- // ((SegmentMergeInfo *)sm->queue->heap[i])->tb->text);
2207
- // }printf("\n\n");
2208
- match_size = 0; // pop matching terms
2256
+ /*
2257
+ for (i = 1; i <= sm->queue->count; i++) {
2258
+ printf("<{%s:%s}>", ((SegmentMergeInfo *)sm->queue->heap[i])->tb->field,
2259
+ ((SegmentMergeInfo *)sm->queue->heap[i])->tb->text);
2260
+ }printf("\n\n");
2261
+ */
2262
+ match_size = 0; /* pop matching terms */
2209
2263
  match[match_size] = pq_pop(sm->queue);
2210
2264
  match_size++;
2211
2265
  tb = match[0]->tb;
@@ -2216,16 +2270,17 @@ void sm_merge_term_infos(SegmentMerger *sm)
2216
2270
  top = pq_top(sm->queue);
2217
2271
  }
2218
2272
 
2219
- //printf(">%s:%s<\n", match[0]->tb->field, match[0]->tb->text);
2220
- sm_merge_term_info(sm, match, match_size); // add new TermInfo
2273
+ /* printf(">%s:%s<\n", match[0]->tb->field, match[0]->tb->text); */
2274
+ sm_merge_term_info(sm, match, match_size); /* add new TermInfo */
2221
2275
 
2222
2276
  while (match_size > 0) {
2223
2277
  match_size--;
2224
2278
  smi = match[match_size];
2225
- if (smi_next(smi) != NULL)
2226
- pq_push(sm->queue, smi); // restore queue
2227
- else
2228
- smi_destroy(smi); // done with a segment
2279
+ if (smi_next(smi) != NULL) {
2280
+ pq_push(sm->queue, smi); /* restore queue */
2281
+ } else {
2282
+ smi_destroy(smi); /* done with a segment */
2283
+ }
2229
2284
  }
2230
2285
  }
2231
2286
  free(match);
@@ -2242,10 +2297,10 @@ void sm_merge_terms(SegmentMerger *sm)
2242
2297
  sprintf(fname, "%s.prx", sm->name);
2243
2298
  sm->prox_out = sm->store->create_output(sm->store, fname);
2244
2299
  sm->tiw = tiw_open(sm->store, sm->name, sm->fis, sm->term_index_interval);
2245
- // terms_buf_pointer holds a buffer of terms since the TermInfosWriter needs
2246
- // to keep the last index_interval terms so that it can compare the last term
2247
- // put in the index with the next one. So the size of the buffer must by
2248
- // index_interval + 2.
2300
+ /* terms_buf_pointer holds a buffer of terms since the TermInfosWriter needs
2301
+ * to keep the last index_interval terms so that it can compare the last term
2302
+ * put in the index with the next one. So the size of the buffer must by
2303
+ * index_interval + 2. */
2249
2304
  sm->terms_buf_pointer = 0;
2250
2305
  sm->terms_buf_size = sm->tiw->index_interval + 2;
2251
2306
  sm->terms_buf = ALLOC_N(Term, sm->terms_buf_size);
@@ -2254,7 +2309,7 @@ void sm_merge_terms(SegmentMerger *sm)
2254
2309
  sm->terms_buf[i].text = ALLOC_N(char, MAX_WORD_SIZE);
2255
2310
  }
2256
2311
  sm->skip_interval = sm->tiw->skip_interval;
2257
- sm->queue = pq_create(sm->readers->size, &smi_lt);
2312
+ sm->queue = pq_create(sm->readers->size, (lt_ft)&smi_lt);
2258
2313
 
2259
2314
  sm_merge_term_infos(sm);
2260
2315
 
@@ -2308,11 +2363,13 @@ void sm_merge_vectors(SegmentMerger *sm)
2308
2363
  ir = sm->readers->elems[i];
2309
2364
  max_doc = ir->max_doc(ir);
2310
2365
  for (j = 0; j < max_doc; j++) {
2311
- // skip deleted docs
2366
+ /* skip deleted docs */
2312
2367
  if (! ir->is_deleted(ir, j)) {
2313
2368
  tvs = ir->get_term_vectors(ir, j);
2314
- tvw_add_all_doc_vectors(tvw, tvs);
2315
- ary_destroy(tvs);
2369
+ if (tvs) {
2370
+ tvw_add_all_doc_vectors(tvw, tvs);
2371
+ ary_destroy(tvs);
2372
+ }
2316
2373
  }
2317
2374
  }
2318
2375
  }
@@ -2333,7 +2390,7 @@ int sm_merge(SegmentMerger *sm)
2333
2390
 
2334
2391
  Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2335
2392
  {
2336
- Array *files = ary_create(0, &efree);
2393
+ Array *files = ary_create(0, &free);
2337
2394
  CompoundWriter *cw = open_cw(sm->store, file_name);
2338
2395
  FieldInfo *fi;
2339
2396
  char fname[SEGMENT_NAME_MAX_LENGTH];
@@ -2344,7 +2401,7 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2344
2401
  ary_append(files, estrdup(fname));
2345
2402
  }
2346
2403
 
2347
- // Field norm files
2404
+ /* Field norm files */
2348
2405
  for (i = 0; i < sm->fis->fcnt; i++) {
2349
2406
  fi = sm->fis->by_number[i];
2350
2407
  if (fi->is_indexed && !fi->omit_norms) {
@@ -2353,7 +2410,7 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2353
2410
  }
2354
2411
  }
2355
2412
 
2356
- // Vector files
2413
+ /* Vector files */
2357
2414
  if (fis_has_vectors(sm->fis)) {
2358
2415
  for (i = 0; i < NELEMS(VECTOR_EXTENSIONS); i++) {
2359
2416
  sprintf(fname, "%s.%s", sm->name, VECTOR_EXTENSIONS[i]);
@@ -2361,12 +2418,12 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *file_name)
2361
2418
  }
2362
2419
  }
2363
2420
 
2364
- // Now merge all added files
2421
+ /* Now merge all added files */
2365
2422
  for (i = 0; i < files->size; i++) {
2366
2423
  cw_add_file(cw, (char *)files->elems[i]);
2367
2424
  }
2368
2425
 
2369
- // Perform the merge
2426
+ /* Perform the merge */
2370
2427
  cw_close(cw);
2371
2428
 
2372
2429
  return files;
@@ -2386,11 +2443,11 @@ void ir_acquire_write_lock(IndexReader *ir)
2386
2443
 
2387
2444
  if (ir->write_lock == NULL) {
2388
2445
  ir->write_lock = ir->store->open_lock(ir->store, WRITE_LOCK_NAME);
2389
- if (!ir->write_lock->obtain(ir->write_lock)) // obtain write lock
2446
+ if (!ir->write_lock->obtain(ir->write_lock)) /* obtain write lock */
2390
2447
  RAISE(STATE_ERROR, WRITE_LOCK_ERROR_MSG);
2391
2448
 
2392
- // we have to check whether index has changed since this reader was opened.
2393
- // if so, this reader is no longer valid for deletion
2449
+ /* we have to check whether index has changed since this reader was opened.
2450
+ * if so, this reader is no longer valid for deletion */
2394
2451
  if (sis_read_current_version(ir->store) > ir->sis->version) {
2395
2452
  ir->is_stale = true;
2396
2453
  ir->write_lock->release(ir->write_lock);
@@ -2401,7 +2458,7 @@ void ir_acquire_write_lock(IndexReader *ir)
2401
2458
  }
2402
2459
  }
2403
2460
 
2404
- IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_store)
2461
+ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner)
2405
2462
  {
2406
2463
  IndexReader *ir = ALLOC(IndexReader);
2407
2464
 
@@ -2414,7 +2471,6 @@ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_
2414
2471
  }
2415
2472
 
2416
2473
  ir->store = store;
2417
- ir->close_store = close_store;
2418
2474
  ir->sis = sis;
2419
2475
  ir->has_changes = false;
2420
2476
  ir->is_stale = false;
@@ -2424,7 +2480,11 @@ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_
2424
2480
  return ir;
2425
2481
  }
2426
2482
 
2427
- IndexReader *ir_open(Store *store, int close_store)
2483
+ /**
2484
+ * Will keep a reference to the store. To let this method delete the store
2485
+ * make sure you deref the store that you pass to it
2486
+ */
2487
+ IndexReader *ir_open(Store *store)
2428
2488
  {
2429
2489
  int i;
2430
2490
  IndexReader *ir;
@@ -2434,13 +2494,14 @@ IndexReader *ir_open(Store *store, int close_store)
2434
2494
  sis = sis_create();
2435
2495
  sis_read(sis, store);
2436
2496
  if (sis->scnt == 1) {
2437
- ir = sr_open(sis, 0, true, close_store);
2497
+ ir = sr_open(sis, 0, true);
2438
2498
  } else {
2439
2499
  IndexReader **readers = ALLOC_N(IndexReader *, sis->scnt);
2440
2500
  for (i = 0; i < sis->scnt; i++) {
2441
- readers[i] = sr_open(sis, i, false, false);
2501
+ readers[i] = sr_open(sis, i, false);
2442
2502
  }
2443
- ir = mr_open(store, sis, readers, sis->scnt, close_store);
2503
+ ref(store);
2504
+ ir = mr_open(store, sis, readers, sis->scnt);
2444
2505
  }
2445
2506
  mutex_unlock(&store->mutex);
2446
2507
  return ir;
@@ -2481,11 +2542,13 @@ void ir_delete_doc(IndexReader *ir, int doc_num)
2481
2542
  Document *ir_get_doc_with_term(IndexReader *ir, Term *term)
2482
2543
  {
2483
2544
  TermDocEnum *tde = ir_term_docs_for(ir, term);
2545
+ Document *doc = NULL;
2546
+
2484
2547
  if (!tde) return NULL;
2485
2548
 
2486
- Document *doc = NULL;
2487
- if (tde->next(tde))
2549
+ if (tde->next(tde)) {
2488
2550
  doc = ir->get_doc(ir, tde->doc_num(tde));
2551
+ }
2489
2552
  tde->close(tde);
2490
2553
  return doc;
2491
2554
  }
@@ -2508,11 +2571,13 @@ void ir_commit_internal(IndexReader *ir)
2508
2571
  {
2509
2572
  if (ir->has_changes) {
2510
2573
  if (ir->is_owner) {
2574
+ Lock *commit_lock;
2511
2575
 
2512
2576
  mutex_lock(&ir->store->mutex);
2513
- Lock *commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
2514
- if (!commit_lock->obtain(commit_lock)) // obtain write lock
2577
+ commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
2578
+ if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
2515
2579
  RAISE(STATE_ERROR, COMMIT_LOCK_ERROR_MSG);
2580
+ }
2516
2581
 
2517
2582
  ir->do_commit(ir);
2518
2583
  sis_write(ir->sis, ir->store);
@@ -2522,7 +2587,7 @@ void ir_commit_internal(IndexReader *ir)
2522
2587
  mutex_unlock(&ir->store->mutex);
2523
2588
 
2524
2589
  if (ir->write_lock != NULL) {
2525
- ir->write_lock->release(ir->write_lock); // release write lock
2590
+ ir->write_lock->release(ir->write_lock); /* release write lock */
2526
2591
  ir->store->close_lock(ir->write_lock);
2527
2592
  ir->write_lock = NULL;
2528
2593
  }
@@ -2545,9 +2610,7 @@ void ir_close(IndexReader *ir)
2545
2610
  mutex_lock(&ir->mutex);
2546
2611
  ir_commit_internal(ir);
2547
2612
  ir->do_close(ir);
2548
- if (ir->close_store) {
2549
- ir->store->close(ir->store);
2550
- }
2613
+ store_deref(ir->store);
2551
2614
  if (ir->is_owner) {
2552
2615
  sis_destroy(ir->sis);
2553
2616
  }
@@ -2562,12 +2625,6 @@ void ir_close(IndexReader *ir)
2562
2625
  free(ir);
2563
2626
  }
2564
2627
 
2565
- void ir_destroy(void *p)
2566
- {
2567
- IndexReader *ir = (IndexReader *)p;
2568
- ir_close(ir);
2569
- }
2570
-
2571
2628
  /**
2572
2629
  * Don't call this method if the cache already exists
2573
2630
  **/