ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/term.c CHANGED
@@ -1,7 +1,7 @@
1
- #include <index.h>
1
+ #include "index.h"
2
+ #include "helper.h"
3
+ #include "hash.h"
2
4
  #include <string.h>
3
- #include <helper.h>
4
- #include <hash.h>
5
5
 
6
6
  static char * const FORMAT_VERSION_ERROR_MSG = "Unknown format version";
7
7
  static char * const TERM_ORDER_ERROR_MSG = "term out of order";
@@ -32,11 +32,10 @@ Term *term_create(const char *field, char *text)
32
32
  return t;
33
33
  }
34
34
 
35
- void term_destroy(void *p)
35
+ void term_destroy(Term *self)
36
36
  {
37
- Term *t = (Term *)p;
38
- free(t->text);
39
- free(t);
37
+ free(self->text);
38
+ free(self);
40
39
  }
41
40
 
42
41
  int term_cmp(void *t1, void *t2)
@@ -87,9 +86,9 @@ TermBuffer *tb_create()
87
86
  return tb;
88
87
  }
89
88
 
90
- void tb_destroy(void *p)
89
+ void tb_destroy(TermBuffer *tb)
91
90
  {
92
- free(p);
91
+ free(tb);
93
92
  }
94
93
 
95
94
  TermBuffer *tb_set_term(TermBuffer *tb, Term *t)
@@ -106,8 +105,9 @@ Term *tb_get_term(TermBuffer *tb)
106
105
 
107
106
  int tb_cmp(TermBuffer *tb1, TermBuffer *tb2)
108
107
  {
109
- int res = strcmp(tb1->field, tb2->field);
110
- if (res != 0) {
108
+ int res;
109
+ if ((tb1->field != tb2->field) &&
110
+ (0 != (res = strcmp(tb1->field, tb2->field)))) {
111
111
  return res;
112
112
  } else {
113
113
  return strcmp(tb1->text, tb2->text);
@@ -133,12 +133,13 @@ TermBuffer *tb_cpy(TermBuffer *tb1, TermBuffer *tb2)
133
133
 
134
134
  TermBuffer *tb_read(TermBuffer *tb, InStream *is, FieldInfos *fis)
135
135
  {
136
- int start = is_read_vint(is);
137
- int length = is_read_vint(is);
136
+ signed int fnum;
137
+ int start = (int)is_read_vint(is);
138
+ int length = (int)is_read_vint(is);
138
139
  int total_length = start + length;
139
140
  is_read_bytes(is, (uchar *)tb->text, start, length);
140
141
  tb->text[total_length] = '\0';
141
- int fnum = is_read_vint(is);
142
+ fnum = (signed int)is_read_vint(is);
142
143
  if (fnum < 0)
143
144
  tb->field = (char *)EMPTY_STRING;
144
145
  else
@@ -171,9 +172,9 @@ TermInfo *ti_set(TermInfo *ti, int doc_freq, int freq_pointer, int prox_pointer,
171
172
  return ti;
172
173
  }
173
174
 
174
- void ti_destroy(void *p)
175
+ void ti_destroy(TermInfo *ti)
175
176
  {
176
- free(p);
177
+ free(ti);
177
178
  }
178
179
 
179
180
  TermInfo *ti_cpy(TermInfo *ti, TermInfo *other)
@@ -208,13 +209,12 @@ TermEnum *te_create()
208
209
  return te;
209
210
  }
210
211
 
211
- void te_destroy(void *p)
212
+ void te_destroy(TermEnum *te)
212
213
  {
213
- TermEnum *te = (TermEnum *)p;
214
214
  tb_destroy(te->tb_curr);
215
215
  tb_destroy(te->tb_prev);
216
216
  ti_destroy(te->ti_curr);
217
- free(p);
217
+ free(te);
218
218
  }
219
219
 
220
220
  Term *te_get_term(TermEnum *te)
@@ -246,11 +246,12 @@ TermBuffer *te_skip_to(TermEnum *te, Term *t)
246
246
  *
247
247
  ****************************************************************************/
248
248
 
249
- #define GET_STE SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
249
+ #define GET_STE SegmentTermEnum *ste = (SegmentTermEnum *)te->data
250
250
 
251
251
  TermBuffer *ste_next(TermEnum *te)
252
252
  {
253
253
  GET_STE;
254
+ TermInfo *ti;
254
255
  InStream *is = ste->is;
255
256
  ste->pos++;
256
257
  if (ste->pos > ste->size - 1) {
@@ -261,38 +262,43 @@ TermBuffer *ste_next(TermEnum *te)
261
262
  tb_cpy(te->tb_prev, te->tb_curr);
262
263
  tb_read(te->tb_curr, is, ste->fis);
263
264
 
264
- TermInfo *ti = te->ti_curr;
265
- ti->doc_freq = is_read_vint(is); // read doc freq
266
- ti->freq_pointer += is_read_vint(is); // read freq pointer
267
- ti->prox_pointer += is_read_vint(is); // read prox pointer
265
+ ti = te->ti_curr;
266
+ ti->doc_freq = (int)is_read_vint(is); /* read doc freq */
267
+ ti->freq_pointer += (int)is_read_vint(is);/* read freq pointer */
268
+ ti->prox_pointer += (int)is_read_vint(is);/* read prox pointer */
268
269
 
269
270
  if (ste->format == -1) {
270
- // just read skip_offset in order to increment file pointer
271
- // value is never used since skip_to is switched off
271
+ /* just read skip_offset in order to increment file pointer
272
+ * value is never used since skip_to is switched off */
272
273
  if (!ste->is_index) {
273
- if (ti->doc_freq > ste->format_m1skip_interval)
274
- ti->skip_offset = is_read_vint(is);
274
+ if (ti->doc_freq > ste->format_m1skip_interval) {
275
+ ti->skip_offset = (int)is_read_vint(is);
276
+ }
275
277
  }
276
278
  } else {
277
- if (ti->doc_freq >= ste->skip_interval)
278
- ti->skip_offset = is_read_vint(is);
279
+ if (ti->doc_freq >= ste->skip_interval) {
280
+ ti->skip_offset = (int)is_read_vint(is);
281
+ }
279
282
  }
280
283
 
281
- if (ste->is_index)
282
- ste->index_pointer += is_read_vint(is); // read index pointer
284
+ if (ste->is_index) {
285
+ ste->index_pointer += (int)is_read_vint(is); /* read index pointer */
286
+ }
283
287
 
284
288
  return te->tb_curr;
285
289
  }
286
290
 
287
291
  TermEnum *ste_clone(TermEnum *other_te);
292
+
288
293
  TermEnum *ste_allocate()
289
294
  {
290
295
  TermEnum *te = te_create();
296
+ SegmentTermEnum *ste;
297
+
291
298
  te->next = &ste_next;
292
299
  te->close = &ste_close;
293
300
  te->clone = &ste_clone;
294
- SegmentTermEnum *ste =
295
- ALLOC(SegmentTermEnum);
301
+ ste = ALLOC(SegmentTermEnum);
296
302
  te->data = ste;
297
303
  return te;
298
304
  }
@@ -302,6 +308,7 @@ TermEnum *ste_clone(TermEnum *other_te)
302
308
  SegmentTermEnum *other_ste = (SegmentTermEnum *)other_te->data;
303
309
  TermEnum *te = ste_allocate();
304
310
  SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
311
+
305
312
  memcpy(ste, other_ste, sizeof(SegmentTermEnum));
306
313
  ste->is = is_clone(other_ste->is);
307
314
  tb_cpy(te->tb_curr, other_te->tb_curr);
@@ -323,6 +330,8 @@ TermEnum *ste_create(InStream *is, FieldInfos *fis, int is_index)
323
330
  {
324
331
  TermEnum *te = ste_allocate();
325
332
  GET_STE;
333
+ int first_int;
334
+
326
335
  ste->fis = fis;
327
336
  ste->is_index = is_index;
328
337
  ste->is = is;
@@ -330,36 +339,36 @@ TermEnum *ste_create(InStream *is, FieldInfos *fis, int is_index)
330
339
  ste->index_pointer = 0;
331
340
  ste->format_m1skip_interval = -1;
332
341
 
333
- int first_int = is_read_int(is);
342
+ first_int = (int)is_read_int(is);
334
343
 
335
344
  if (first_int >= 0) {
336
- // original-format file, without explicit format version number
345
+ /* original-format file, without explicit format version number */
337
346
  ste->format = 0;
338
347
  ste->size = first_int;
339
348
 
340
- // back-compatible settings
349
+ /* back-compatible settings */
341
350
  ste->index_interval = 128;
342
- ste->skip_interval = INT_MAX; // switch off skip_to optimization
351
+ ste->skip_interval = INT_MAX; /* switch off skip_to optimization */
343
352
 
344
353
  } else {
345
- // check that it is a format we can understand
354
+ /* check that it is a format we can understand */
346
355
  if (first_int < TERM_INFO_FORMAT)
347
- RAISE(ERROR, FORMAT_VERSION_ERROR_MSG);
356
+ RAISE(EXCEPTION, FORMAT_VERSION_ERROR_MSG);
348
357
 
349
- // we have a format version number
358
+ /* we have a format version number */
350
359
  ste->format = first_int;
351
360
 
352
361
 
353
- ste->size = is_read_long(is); // read the size
362
+ ste->size = (int)is_read_long(is); /* read the size */
354
363
 
355
364
  if (ste->format == -1) {
356
365
  if (!ste->is_index) {
357
366
  ste->index_interval = is_read_int(is);
358
367
  ste->format_m1skip_interval = is_read_int(is);
359
368
  }
360
- // switch off skip_to optimization for file format prior to
361
- // 1.4rc2 in order to avoid a bug in skip_to implementation
362
- // of these versions
369
+ /* switch off skip_to optimization for file format prior to
370
+ * 1.4rc2 in order to avoid a bug in skip_to implementation
371
+ * of these versions */
363
372
  ste->skip_interval = INT_MAX;
364
373
  } else {
365
374
  ste->index_interval = is_read_int(is);
@@ -407,7 +416,7 @@ Term *ste_scan_for_term(TermEnum *te, int pos)
407
416
  *
408
417
  ****************************************************************************/
409
418
 
410
- #define GET_MTE MultiTermEnum *mte = (MultiTermEnum *)te->data;
419
+ #define GET_MTE MultiTermEnum *mte = (MultiTermEnum *)te->data
411
420
 
412
421
  TermBuffer *mte_next(TermEnum *te)
413
422
  {
@@ -426,11 +435,11 @@ TermBuffer *mte_next(TermEnum *te)
426
435
 
427
436
  while ((top != NULL) && (tb_cmp(te->tb_curr, top->tb) == 0)) {
428
437
  pq_pop(mte->smi_queue);
429
- te->ti_curr->doc_freq += top->te->ti_curr->doc_freq; // increment freq
438
+ te->ti_curr->doc_freq += top->te->ti_curr->doc_freq;/* increment freq */
430
439
  if (smi_next(top)) {
431
- pq_push(mte->smi_queue, top); // restore queue
440
+ pq_push(mte->smi_queue, top); /* restore queue */
432
441
  } else {
433
- smi_destroy(top); // done with a segment
442
+ smi_destroy(top); /* done with a segment */
434
443
  }
435
444
  top = (SegmentMergeInfo *)pq_top(mte->smi_queue);
436
445
  }
@@ -455,21 +464,21 @@ TermEnum *mte_clone(TermEnum *te)
455
464
  TermEnum *mte_create(IndexReader **readers, int *starts, int rcnt, Term *t)
456
465
  {
457
466
  int i;
467
+ IndexReader *reader;
468
+ TermEnum *sub_te;
469
+ MultiTermEnum *mte = ALLOC(MultiTermEnum);
458
470
  TermEnum *te = te_create();
459
471
  te->next = &mte_next;
460
472
  te->clone = &mte_clone;
461
473
  te->close = &mte_close;
462
474
 
463
- MultiTermEnum *mte = ALLOC(MultiTermEnum);
464
475
  te->data = mte;
465
476
 
466
- IndexReader *reader;
467
- TermEnum *sub_te;
468
-
469
- mte->smi_queue = pq_create(rcnt, &smi_lt);
470
- mte->smi_queue->free_elem = &smi_destroy;
477
+ mte->smi_queue = pq_create(rcnt, (lt_ft)&smi_lt);
478
+ mte->smi_queue->free_elem = (free_ft)&smi_destroy;
471
479
 
472
480
  for (i = 0; i < rcnt; i++) {
481
+ SegmentMergeInfo *smi;
473
482
  reader = readers[i];
474
483
 
475
484
  if (t != NULL) {
@@ -478,10 +487,10 @@ TermEnum *mte_create(IndexReader **readers, int *starts, int rcnt, Term *t)
478
487
  sub_te = reader->terms(reader);
479
488
  }
480
489
 
481
- SegmentMergeInfo *smi = smi_create(starts[i], sub_te, reader);
490
+ smi = smi_create(starts[i], sub_te, reader);
482
491
  if (((t == NULL) && smi_next(smi)) ||
483
492
  (sub_te->tb_curr->field != (char *)EMPTY_STRING)) {
484
- pq_push(mte->smi_queue, smi); // initialize queue
493
+ pq_push(mte->smi_queue, smi); /* initialize queue */
485
494
  } else {
486
495
  smi_destroy(smi);
487
496
  }
@@ -508,7 +517,10 @@ TermInfosWriter *tiw_open_internal(Store *store,
508
517
  int interval,
509
518
  int is_index)
510
519
  {
520
+ char fname[SEGMENT_NAME_MAX_LENGTH];
511
521
  TermInfosWriter *tiw = ALLOC(TermInfosWriter);
522
+ OutStream *os;
523
+
512
524
  tiw->index_interval = interval;
513
525
  tiw->skip_interval = 16;
514
526
  tiw->last_index_pointer = 0;
@@ -520,14 +532,13 @@ TermInfosWriter *tiw_open_internal(Store *store,
520
532
  tiw->curr_field = NULL;
521
533
  tiw->curr_field_num = -1;
522
534
 
523
- char fname[SEGMENT_NAME_MAX_LENGTH];
524
535
  strcpy(fname, segment);
525
536
  strcat(fname, (is_index ? ".tii" : ".tis"));
526
- OutStream *os = tiw->os = store->create_output(store, fname);
527
- os_write_int(os, TERM_INFO_FORMAT); // write format
528
- os_write_long(os, 0); // leave space for size
529
- os_write_int(os, tiw->index_interval); // write index_interval
530
- os_write_int(os, tiw->skip_interval); // write skip_interval
537
+ os = tiw->os = store->create_output(store, fname);
538
+ os_write_int(os, TERM_INFO_FORMAT); /* write format */
539
+ os_write_long(os, 0); /* leave space for size */
540
+ os_write_int(os, tiw->index_interval); /* write index_interval */
541
+ os_write_int(os, tiw->skip_interval); /* write skip_interval */
531
542
  if (!is_index) {
532
543
  tiw->other = tiw_open_internal(store, segment, fis, interval, true);
533
544
  tiw->other->other = tiw;
@@ -544,11 +555,11 @@ void tiw_write_term(TermInfosWriter *tiw, OutStream *os, Term *t)
544
555
  {
545
556
  //printf("%s, %s\n", tiw->last_term->text, t->text);
546
557
  int start = hlp_string_diff(tiw->last_term->text, t->text);
547
- int length = strlen(t->text) - start;
558
+ int length = (int)strlen(t->text) - start;
548
559
 
549
- os_write_vint(os, start); // write shared prefix length
550
- os_write_vint(os, length); // write delta length
551
- os_write_chars(os, t->text, start, length); // write delta chars
560
+ os_write_vint(os, start); /* write shared prefix length */
561
+ os_write_vint(os, length); /* write delta length */
562
+ os_write_chars(os, t->text, start, length); /* write delta chars */
552
563
  if (tiw->curr_field != t->field) {
553
564
  tiw->curr_field = t->field;
554
565
  tiw->curr_field_num = fis_get_number(tiw->fis, t->field);
@@ -559,6 +570,7 @@ void tiw_write_term(TermInfosWriter *tiw, OutStream *os, Term *t)
559
570
 
560
571
  void tiw_add(TermInfosWriter *tiw, Term *t, TermInfo *ti)
561
572
  {
573
+ #ifdef DEBUG
562
574
  if (tiw->is_index && term_cmp(tiw->last_term, t) > 0) {
563
575
  RAISE(STATE_ERROR, TERM_ORDER_ERROR_MSG);
564
576
  }
@@ -568,22 +580,27 @@ void tiw_add(TermInfosWriter *tiw, Term *t, TermInfo *ti)
568
580
  if (ti->prox_pointer < tiw->last_term_info->prox_pointer) {
569
581
  RAISE(STATE_ERROR, PP_ORDER_ERROR_MSG);
570
582
  }
583
+ #endif
571
584
 
572
- if (!tiw->is_index && (tiw->size % tiw->index_interval) == 0)
573
- tiw_add(tiw->other, tiw->last_term, tiw->last_term_info); // add an index term
585
+ if (!tiw->is_index && (tiw->size % tiw->index_interval) == 0) {
586
+ /* add an index term */
587
+ tiw_add(tiw->other, tiw->last_term, tiw->last_term_info);
588
+ }
574
589
 
575
- tiw_write_term(tiw, tiw->os, t); // write term
576
- os_write_vint(tiw->os, ti->doc_freq); // write doc freq
590
+ tiw_write_term(tiw, tiw->os, t); /* write term */
591
+ os_write_vint(tiw->os, ti->doc_freq); /* write doc freq */
577
592
  os_write_vint(tiw->os, ti->freq_pointer - tiw->last_term_info->freq_pointer);
578
593
  os_write_vint(tiw->os, ti->prox_pointer - tiw->last_term_info->prox_pointer);
579
- if (ti->doc_freq >= tiw->skip_interval)
594
+
595
+ if (ti->doc_freq >= tiw->skip_interval) {
580
596
  os_write_vint(tiw->os, ti->skip_offset);
597
+ }
581
598
 
582
599
  if (tiw->is_index) {
583
600
  OutStream *other_os = tiw->other->os;
584
601
  int other_pos = os_pos(other_os);
585
602
  os_write_vint(tiw->os, other_pos - tiw->last_index_pointer);
586
- tiw->last_index_pointer = other_pos; // write pointer
603
+ tiw->last_index_pointer = other_pos; /* write pointer */
587
604
  }
588
605
 
589
606
  ti_cpy(tiw->last_term_info, ti);
@@ -593,7 +610,7 @@ void tiw_add(TermInfosWriter *tiw, Term *t, TermInfo *ti)
593
610
  void tiw_close(TermInfosWriter *tiw)
594
611
  {
595
612
  OutStream *os = tiw->os;
596
- os_seek(os, 4); // write @size after format
613
+ os_seek(os, 4); /* write @size after format */
597
614
  os_write_long(os, tiw->size);
598
615
  os_close(os);
599
616
 
@@ -632,17 +649,20 @@ void tir_close(TermInfosReader *tir)
632
649
 
633
650
  TermInfosReader *tir_open(Store *store, char *segment, FieldInfos *fis)
634
651
  {
652
+ SegmentTermEnum *ste;
635
653
  TermInfosReader *tir = ALLOC(TermInfosReader);
636
654
  char fname[SEGMENT_NAME_MAX_LENGTH];
655
+ InStream *is;
656
+
637
657
  mutex_init(&tir->mutex, NULL);
638
658
  strcpy(fname, segment);
639
659
  strcpy(fname + strlen(segment), ".tis");
640
- InStream *is = store->open_input(store, fname);
660
+ is = store->open_input(store, fname);
641
661
  tir->orig_te = ste_create(is, fis, false);
642
662
  thread_key_create(&tir->thread_te, NULL);
643
- tir->te_bucket = ary_create(1, (destroy_func_t)tir->orig_te->close);
663
+ tir->te_bucket = ary_create(1, (free_ft)tir->orig_te->close);
644
664
 
645
- SegmentTermEnum *ste = tir->orig_te->data;
665
+ ste = tir->orig_te->data;
646
666
  tir->size = ste->size;
647
667
  tir->skip_interval = ste->skip_interval;
648
668
 
@@ -659,6 +679,9 @@ void tir_ensure_index_is_read(TermInfosReader *tir)
659
679
  {
660
680
  mutex_lock(&tir->mutex);
661
681
  if (tir->index_terms == NULL) {
682
+ TermEnum *index_te;
683
+ SegmentTermEnum *ste;
684
+ int i = 0;
662
685
  int index_size = ((SegmentTermEnum *)tir->index_te->data)->size;
663
686
  tir->index_size = index_size;
664
687
 
@@ -666,9 +689,8 @@ void tir_ensure_index_is_read(TermInfosReader *tir)
666
689
  tir->index_term_infos = ALLOC_N(TermInfo *, index_size);
667
690
  tir->index_pointers = ALLOC_N(int, index_size);
668
691
 
669
- int i = 0;
670
- TermEnum *index_te = tir->index_te;
671
- SegmentTermEnum *ste = index_te->data;
692
+ index_te = tir->index_te;
693
+ ste = index_te->data;
672
694
 
673
695
  TRY
674
696
  while (ste_next(index_te) != NULL) {
@@ -708,7 +730,7 @@ void tir_seek_enum(TermInfosReader *tir, int ind_offset)
708
730
 
709
731
  int tir_get_index_offset(TermInfosReader *tir, Term *t)
710
732
  {
711
- int lo = 0; // binary search tir->index_terms[]
733
+ int lo = 0; /* binary search tir->index_terms[] */
712
734
  int hi = tir->index_size - 1;
713
735
  int mid, delta;
714
736
  Term **index_terms = tir->index_terms;
@@ -729,61 +751,70 @@ int tir_get_index_offset(TermInfosReader *tir, Term *t)
729
751
 
730
752
  TermInfo *tir_get_ti(TermInfosReader *tir, Term *t)
731
753
  {
732
- if (tir->size == 0)
754
+ TermEnum *te;
755
+ SegmentTermEnum *ste;
756
+ if (tir->size == 0) {
733
757
  return NULL;
758
+ }
734
759
 
735
760
  tir_ensure_index_is_read(tir);
736
761
 
737
- // optimize sequential access: first try scanning cached enum w/o seeking
738
- TermEnum *te = tir_enum(tir);
739
- SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
762
+ /* optimize sequential access: first try scanning cached enum w/o seeking */
763
+ te = tir_enum(tir);
764
+ ste = (SegmentTermEnum *)te->data;
740
765
  if (ste->pos < ste->size && tb_term_cmp(te->tb_curr, t) <= 0) {
741
766
  SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
742
767
  int enum_offset = (int)(ste->pos / ste->index_interval) + 1;
743
768
  if (tir->index_size == enum_offset ||
744
- term_cmp(t, tir->index_terms[enum_offset]) < 0) { // but before end of block
745
- return ste_scan_for_term_info(te, t); // no need to seek
769
+ term_cmp(t, tir->index_terms[enum_offset]) < 0) { /* but before end of block */
770
+ return ste_scan_for_term_info(te, t); /* no need to seek */
746
771
  }
747
772
  }
748
773
 
749
- // random-access: must seek
774
+ /* random-access: must seek */
750
775
  tir_seek_enum(tir, tir_get_index_offset(tir, t));
751
776
  return ste_scan_for_term_info(te, t);
752
777
  }
753
778
 
754
779
  Term *tir_get_term(TermInfosReader *tir, int pos)
755
780
  {
756
- if (tir->size == 0)
781
+ if (tir->size == 0) {
757
782
  return NULL;
783
+ } else {
784
+ TermEnum *te = tir_enum(tir);
785
+ SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
786
+ if (pos >= ste->pos &&
787
+ pos < (ste->pos + ste->index_interval)) {
788
+ return ste_scan_for_term(te, pos); /* can avoid seek */
789
+ }
758
790
 
759
- TermEnum *te = tir_enum(tir);
760
- SegmentTermEnum *ste = (SegmentTermEnum *)te->data;
761
- if (pos >= ste->pos &&
762
- pos < (ste->pos + ste->index_interval)) {
763
- return ste_scan_for_term(te, pos); // can avoid seek
791
+ tir_seek_enum(tir, (int)(pos / ste->index_interval)); /* must seek */
792
+ return ste_scan_for_term(te, pos);
764
793
  }
765
-
766
- tir_seek_enum(tir, (int)(pos / ste->index_interval)); // must seek
767
- return ste_scan_for_term(te, pos);
768
794
  }
769
795
 
770
796
  int tir_get_term_pos(TermInfosReader *tir, Term *t)
771
797
  {
772
- if (tir->size == 0)
798
+ if (tir->size == 0) {
773
799
  return -1;
800
+ } else {
801
+ TermEnum *te;
802
+ int ind_offset;
803
+
804
+ tir_ensure_index_is_read(tir);
774
805
 
775
- tir_ensure_index_is_read(tir);
776
-
777
- int ind_offset = tir_get_index_offset(tir, t);
778
- tir_seek_enum(tir, ind_offset);
806
+ ind_offset = tir_get_index_offset(tir, t);
807
+ tir_seek_enum(tir, ind_offset);
779
808
 
780
- TermEnum *te = tir_enum(tir);
781
- while ((tb_term_cmp(te->tb_curr, t) < 0) && (ste_next(te) != NULL))
782
- ;
809
+ te = tir_enum(tir);
810
+ while ((tb_term_cmp(te->tb_curr, t) < 0) && (ste_next(te) != NULL)) {
811
+ }
783
812
 
784
- if (tb_term_cmp(te->tb_curr, t) == 0)
785
- return ((SegmentTermEnum *)te->data)->pos;
786
- else
787
- return -1;
813
+ if (tb_term_cmp(te->tb_curr, t) == 0) {
814
+ return ((SegmentTermEnum *)te->data)->pos;
815
+ } else {
816
+ return -1;
817
+ }
818
+ }
788
819
  }
789
820